Author: jhibbits
Date: Tue Apr 19 01:48:18 2016
New Revision: 298237
URL: https://svnweb.freebsd.org/changeset/base/298237

Log:
  Fix SMP booting for PowerPC Book-E
  
  Summary:
  PowerPC Book-E SMP is currently broken for unknown reasons.  Pull in
  Semihalf changes made c2012 for e500mc/e5500, which enables booting SMP.
  
  This eliminates the shared software TLB1 table, replacing it with
  tlb1_read_entry() function.
  
  This does not yet support ePAPR SMP booting, and doesn't handle resetting CPUs
  already released (ePAPR boot releases APs to a spin loop waiting on a specific
  address).  This will be addressed in the near future by using the MPIC to 
reset
  the AP into our own alternate boot address.
  
  This does include a change to the dpaa/dtsec(4) driver, to mark the portals as
  CPU-private.
  
  Test Plan:
  Tested on Amiga X5000/20 (P5020).  Boots, prints the following
  messages:
  
   Adding CPU 0, pir=0, awake=1
   Waking up CPU 1 (dev=1)
   Adding CPU 1, pir=20, awake=1
   SMP: AP CPU #1 launched
  
  top(1) shows CPU1 active.
  
  Obtained from:        Semihalf
  Relnotes:     Yes
  Differential Revision: https://reviews.freebsd.org/D5945

Modified:
  head/sys/dev/dpaa/portals_common.c
  head/sys/powerpc/booke/locore.S
  head/sys/powerpc/booke/pmap.c
  head/sys/powerpc/include/tlb.h
  head/sys/powerpc/mpc85xx/platform_mpc85xx.c
  head/sys/powerpc/powerpc/genassym.c
  head/sys/powerpc/powerpc/mp_machdep.c

Modified: head/sys/dev/dpaa/portals_common.c
==============================================================================
--- head/sys/dev/dpaa/portals_common.c  Tue Apr 19 01:25:35 2016        
(r298236)
+++ head/sys/dev/dpaa/portals_common.c  Tue Apr 19 01:48:18 2016        
(r298237)
@@ -75,8 +75,6 @@ dpaa_portal_alloc_res(device_t dev, stru
                sc->sc_rres[0] = bus_alloc_resource(dev,
                    SYS_RES_MEMORY, &sc->sc_rrid[0], rle->start + sc->sc_dp_pa,
                    rle->end + sc->sc_dp_pa, rle->count, RF_ACTIVE);
-               
pmap_change_attr((vm_offset_t)rman_get_bushandle(sc->sc_rres[0]),
-                   rle->count, VM_MEMATTR_CACHEABLE);
                if (sc->sc_rres[0] == NULL) {
                        device_printf(dev, "Could not allocate memory.\n");
                        return (ENXIO);

Modified: head/sys/powerpc/booke/locore.S
==============================================================================
--- head/sys/powerpc/booke/locore.S     Tue Apr 19 01:25:35 2016        
(r298236)
+++ head/sys/powerpc/booke/locore.S     Tue Apr 19 01:48:18 2016        
(r298237)
@@ -104,6 +104,10 @@ __start:
        mtmsr   %r3
        isync
 
+/*
+ * Initial HIDs configuration
+ */
+1:
        mfpvr   %r3
        rlwinm  %r3, %r3, 16, 16, 31
 
@@ -161,7 +165,6 @@ __start:
 /*
  * Create temporary mapping in AS=1 and switch to it
  */
-       addi    %r3, %r29, 1
        bl      tlb1_temp_mapping_as1
 
        mfmsr   %r3
@@ -197,7 +200,7 @@ __start:
        lis     %r3, KERNBASE@h
        ori     %r3, %r3, KERNBASE@l    /* EPN = KERNBASE */
 #ifdef SMP
-       ori     %r3, %r3, MAS2_M@l      /* WIMGE = 0b00100 */
+       ori     %r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */
 #endif
        mtspr   SPR_MAS2, %r3
        isync
@@ -295,21 +298,19 @@ done_mapping:
 __boot_page:
        bl      1f
 
-       .globl  bp_ntlb1s
-bp_ntlb1s:
+       .globl  bp_trace
+bp_trace:
        .long   0
 
-       .globl  bp_tlb1
-bp_tlb1:
-       .space  4 * 3 * 64
-
-       .globl  bp_tlb1_end
-bp_tlb1_end:
+       .globl  bp_kernload
+bp_kernload:
+       .long   0
 
 /*
  * Initial configuration
  */
-1:     mflr    %r31            /* r31 hold the address of bp_ntlb1s */
+1:
+       mflr    %r31            /* r31 hold the address of bp_trace */
 
        /* Set HIDs */
        mfpvr   %r3
@@ -332,20 +333,7 @@ bp_tlb1_end:
 3:
        mtspr   SPR_HID0, %r4
        isync
-/*
- * E500mc and E5500 do not have HID1 register, so skip HID1 setup on
- * this core.
- */
-       cmpli   0, 0, %r3, FSL_E500mc
-       beq     1f
-       cmpli   0, 0, %r3, FSL_E5500
-       beq     1f
 
-       lis     %r3, HID1_E500_DEFAULT_SET@h
-       ori     %r3, %r3, HID1_E500_DEFAULT_SET@l
-       mtspr   SPR_HID1, %r3
-       isync
-1:
        /* Enable branch prediction */
        li      %r3, BUCSR_BPEN
        mtspr   SPR_BUCSR, %r3
@@ -367,7 +355,7 @@ bp_tlb1_end:
 /*
  * Create temporary translation in AS=1 and switch to it
  */
-       lwz     %r3, 0(%r31)
+
        bl      tlb1_temp_mapping_as1
 
        mfmsr   %r3
@@ -388,39 +376,46 @@ bp_tlb1_end:
 /*
  * Setup final mapping in TLB1[1] and switch to it
  */
-       lwz     %r6, 0(%r31)
-       addi    %r5, %r31, 4
-       li      %r4, 0
-
-4:     lis     %r3, MAS0_TLBSEL1@h
-       rlwimi  %r3, %r4, 16, 12, 15
+       /* Final kernel mapping, map in 64 MB of RAM */
+       lis     %r3, MAS0_TLBSEL1@h     /* Select TLB1 */
+       li      %r4, 0                  /* Entry 0 */
+       rlwimi  %r3, %r4, 16, 4, 15
        mtspr   SPR_MAS0, %r3
        isync
-       lwz     %r3, 0(%r5)
-       mtspr   SPR_MAS1, %r3
+
+       li      %r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l
+       oris    %r3, %r3, (MAS1_VALID | MAS1_IPROT)@h
+       mtspr   SPR_MAS1, %r3           /* note TS was not filled, so it's TS=0 
*/
        isync
-       lwz     %r3, 4(%r5)
+
+       lis     %r3, KERNBASE@h
+       ori     %r3, %r3, KERNBASE@l    /* EPN = KERNBASE */
+       ori     %r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */
        mtspr   SPR_MAS2, %r3
        isync
-       lwz     %r3, 8(%r5)
+
+       /* Retrieve kernel load [physical] address from bp_kernload */
+       bl      4f
+       .long   bp_kernload
+       .long   __boot_page
+4:     mflr    %r3
+       lwz     %r4, 0(%r3)
+       lwz     %r5, 4(%r3)
+       rlwinm  %r3, %r3, 0, 0, 19
+       sub     %r4, %r4, %r5   /* offset of bp_kernload within __boot_page */
+       lwzx    %r3, %r4, %r3
+
+       /* Set RPN and protection */
+       ori     %r3, %r3, (MAS3_SX | MAS3_SW | MAS3_SR)@l
        mtspr   SPR_MAS3, %r3
        isync
        tlbwe
        isync
        msync
-       addi    %r5, %r5, 12
-       addi    %r4, %r4, 1
-       cmpw    %r4, %r6
-       blt     4b
 
        /* Switch to the final mapping */
        bl      5f
-       .long __boot_page-.
-5:     mflr    %r5
-       lwz     %r3,0(%r3)
-       add     %r5,%r5,%r3             /* __boot_page in r5 */
-       bl      6f
-6:     mflr    %r3
+5:     mflr    %r3
        rlwinm  %r3, %r3, 0, 0xfff      /* Offset from boot page start */
        add     %r3, %r3, %r5           /* Make this virtual address */
        addi    %r3, %r3, 32
@@ -449,6 +444,7 @@ bp_tlb1_end:
 1:     mflr    %r1
        lwz     %r2,0(%r1)
        add     %r1,%r1,%r2
+       stw     %r1, 0(%r1)
        addi    %r1, %r1, (TMPSTACKSZ - 16)
 
 /*
@@ -479,6 +475,7 @@ bp_tlb1_end:
 6:     b       6b
 #endif /* SMP */
 
+#if defined (BOOKE_E500)
 /*
  * Invalidate all entries in the given TLB.
  *
@@ -508,7 +505,7 @@ tlb1_find_current:
        isync
        tlbsx   0, %r3
        mfspr   %r17, SPR_MAS0
-       rlwinm  %r29, %r17, 16, 20, 31          /* MAS0[ESEL] -> r29 */
+       rlwinm  %r29, %r17, 16, 26, 31          /* MAS0[ESEL] -> r29 */
 
        /* Make sure we have IPROT set on the entry */
        mfspr   %r17, SPR_MAS1
@@ -541,14 +538,11 @@ tlb1_inval_entry:
        blr
 
 /*
- * r3          entry of temp translation
- * r29         entry of current translation
- * r28         returns temp entry passed in r3
- * r4-r5       scratched
+ * r29         current entry number
+ * r28         returned temp entry
+ * r3-r5       scratched
  */
 tlb1_temp_mapping_as1:
-       mr      %r28, %r3
-
        /* Read our current translation */
        lis     %r3, MAS0_TLBSEL1@h     /* Select TLB1 */
        rlwimi  %r3, %r29, 16, 10, 15   /* Select our current entry */
@@ -556,8 +550,14 @@ tlb1_temp_mapping_as1:
        isync
        tlbre
 
-       /* Prepare and write temp entry */
+       /*
+        * Prepare and write temp entry
+        *
+        * FIXME this is not robust against overflow i.e. when the current
+        * entry is the last in TLB1
+        */
        lis     %r3, MAS0_TLBSEL1@h     /* Select TLB1 */
+       addi    %r28, %r29, 1           /* Use next entry. */
        rlwimi  %r3, %r28, 16, 10, 15   /* Select temp entry */
        mtspr   SPR_MAS0, %r3
        isync
@@ -640,8 +640,19 @@ zero_mas8:
        mtspr   SPR_MAS8, %r20
        isync
        blr
+#endif
 
 #ifdef SMP
+.globl __boot_tlb1
+       /*
+        * The __boot_tlb1 table is used to hold BSP TLB1 entries
+        * marked with _TLB_ENTRY_SHARED flag during AP bootstrap.
+        * The BSP fills in the table in tlb_ap_prep() function. Next,
+        * AP loads its contents to TLB1 hardware in pmap_bootstrap_ap().
+        */
+__boot_tlb1:
+       .space TLB1_MAX_ENTRIES * TLB_ENTRY_SIZE
+
 __boot_page_padding:
        /*
         * Boot page needs to be exactly 4K, with the last word of this page
@@ -779,14 +790,8 @@ ENTRY(dataloss_erratum_access)
        mtspr   SPR_L1CSR1, %r11
        isync
 
-       mflr    %r9
-       bl      1f
-       .long 2f-.
-1:
-       mflr    %r5
-       lwz     %r8, 0(%r5)
-       mtlr    %r9
-       add     %r8, %r8, %r5
+       lis     %r8, 2f@h
+       ori     %r8, %r8, 2f@l
        icbtls  0, 0, %r8
        addi    %r9, %r8, 64
 

Modified: head/sys/powerpc/booke/pmap.c
==============================================================================
--- head/sys/powerpc/booke/pmap.c       Tue Apr 19 01:25:35 2016        
(r298236)
+++ head/sys/powerpc/booke/pmap.c       Tue Apr 19 01:48:18 2016        
(r298237)
@@ -110,10 +110,6 @@ extern unsigned char _end[];
 
 extern uint32_t *bootinfo;
 
-#ifdef SMP
-extern uint32_t bp_ntlb1s;
-#endif
-
 vm_paddr_t kernload;
 vm_offset_t kernstart;
 vm_size_t kernsize;
@@ -187,11 +183,6 @@ uint32_t tlb1_entries;
 #define TLB1_ENTRIES (tlb1_entries)
 #define TLB1_MAXENTRIES        64
 
-/* In-ram copy of the TLB1 */
-static tlb_entry_t tlb1[TLB1_MAXENTRIES];
-
-/* Next free entry in the TLB1 */
-static unsigned int tlb1_idx;
 static vm_offset_t tlb1_map_base = VM_MAXUSER_ADDRESS + PAGE_SIZE;
 
 static tlbtid_t tid_alloc(struct pmap *);
@@ -199,7 +190,8 @@ static void tid_flush(tlbtid_t tid);
 
 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t);
 
-static void tlb1_write_entry(unsigned int);
+static void tlb1_read_entry(tlb_entry_t *, unsigned int);
+static void tlb1_write_entry(tlb_entry_t *, unsigned int);
 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *);
 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t);
 
@@ -271,6 +263,7 @@ static vm_offset_t ptbl_buf_pool_vabase;
 static struct ptbl_buf *ptbl_bufs;
 
 #ifdef SMP
+extern tlb_entry_t __boot_tlb1[];
 void pmap_bootstrap_ap(volatile uint32_t *);
 #endif
 
@@ -1369,6 +1362,22 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset
 }
 
 #ifdef SMP
+ void
+tlb1_ap_prep(void)
+{
+       tlb_entry_t *e, tmp;
+       unsigned int i;
+
+       /* Prepare TLB1 image for AP processors */
+       e = __boot_tlb1;
+       for (i = 0; i < TLB1_ENTRIES; i++) {
+               tlb1_read_entry(&tmp, i);
+
+               if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED))
+                       memcpy(e++, &tmp, sizeof(tmp));
+       }
+}
+
 void
 pmap_bootstrap_ap(volatile uint32_t *trcp __unused)
 {
@@ -1376,15 +1385,15 @@ pmap_bootstrap_ap(volatile uint32_t *trc
 
        /*
         * Finish TLB1 configuration: the BSP already set up its TLB1 and we
-        * have the snapshot of its contents in the s/w tlb1[] table, so use
-        * these values directly to (re)program AP's TLB1 hardware.
-        */
-       for (i = bp_ntlb1s; i < tlb1_idx; i++) {
-               /* Skip invalid entries */
-               if (!(tlb1[i].mas1 & MAS1_VALID))
-                       continue;
-
-               tlb1_write_entry(i);
+        * have the snapshot of its contents in the s/w __boot_tlb1[] table
+        * created by tlb1_ap_prep(), so use these values directly to
+        * (re)program AP's TLB1 hardware.
+        *
+        * Start at index 1 because index 0 has the kernel map.
+        */
+       for (i = 1; i < TLB1_ENTRIES; i++) {
+               if (__boot_tlb1[i].mas1 & MAS1_VALID)
+                       tlb1_write_entry(&__boot_tlb1[i], i);
        }
 
        set_mas4_defaults();
@@ -1429,14 +1438,16 @@ mmu_booke_extract(mmu_t mmu, pmap_t pmap
 static vm_paddr_t
 mmu_booke_kextract(mmu_t mmu, vm_offset_t va)
 {
+       tlb_entry_t e;
        int i;
 
        /* Check TLB1 mappings */
-       for (i = 0; i < tlb1_idx; i++) {
-               if (!(tlb1[i].mas1 & MAS1_VALID))
+       for (i = 0; i < TLB1_ENTRIES; i++) {
+               tlb1_read_entry(&e, i);
+               if (!(e.mas1 & MAS1_VALID))
                        continue;
-               if (va >= tlb1[i].virt && va < tlb1[i].virt + tlb1[i].size)
-                       return (tlb1[i].phys + (va - tlb1[i].virt));
+               if (va >= e.virt && va < e.virt + e.size)
+                       return (e.phys + (va - e.virt));
        }
 
        return (pte_vatopa(mmu, kernel_pmap, va));
@@ -2652,7 +2663,7 @@ mmu_booke_dev_direct_mapped(mmu_t mmu, v
         * This currently does not work for entries that
         * overlap TLB1 entries.
         */
-       for (i = 0; i < tlb1_idx; i ++) {
+       for (i = 0; i < TLB1_ENTRIES; i ++) {
                if (tlb1_iomapped(i, pa, size, &va) == 0)
                        return (0);
        }
@@ -2692,28 +2703,36 @@ mmu_booke_dumpsys_unmap(mmu_t mmu, vm_pa
        vm_paddr_t ppa;
        vm_offset_t ofs;
        vm_size_t gran;
+       tlb_entry_t e;
+       int i;
 
        /* Minidumps are based on virtual memory addresses. */
        /* Nothing to do... */
        if (do_minidump)
                return;
 
+       for (i = 0; i < TLB1_ENTRIES; i++) {
+               tlb1_read_entry(&e, i);
+               if (!(e.mas1 & MAS1_VALID))
+                       break;
+       }
+
        /* Raw physical memory dumps don't have a virtual address. */
-       tlb1_idx--;
-       tlb1[tlb1_idx].mas1 = 0;
-       tlb1[tlb1_idx].mas2 = 0;
-       tlb1[tlb1_idx].mas3 = 0;
-       tlb1_write_entry(tlb1_idx);
+       i--;
+       e.mas1 = 0;
+       e.mas2 = 0;
+       e.mas3 = 0;
+       tlb1_write_entry(&e, i);
 
        gran = 256 * 1024 * 1024;
        ppa = pa & ~(gran - 1);
        ofs = pa - ppa;
        if (sz > (gran - ofs)) {
-               tlb1_idx--;
-               tlb1[tlb1_idx].mas1 = 0;
-               tlb1[tlb1_idx].mas2 = 0;
-               tlb1[tlb1_idx].mas3 = 0;
-               tlb1_write_entry(tlb1_idx);
+               i--;
+               e.mas1 = 0;
+               e.mas2 = 0;
+               e.mas3 = 0;
+               tlb1_write_entry(&e, i);
        }
 }
 
@@ -2796,6 +2815,7 @@ mmu_booke_mapdev(mmu_t mmu, vm_paddr_t p
 static void *
 mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t 
ma)
 {
+       tlb_entry_t e;
        void *res;
        uintptr_t va, tmpva;
        vm_size_t sz;
@@ -2807,13 +2827,14 @@ mmu_booke_mapdev_attr(mmu_t mmu, vm_padd
         * requirement, but now only checks the easy case.
         */
        if (ma == VM_MEMATTR_DEFAULT) {
-               for (i = 0; i < tlb1_idx; i++) {
-                       if (!(tlb1[i].mas1 & MAS1_VALID))
+               for (i = 0; i < TLB1_ENTRIES; i++) {
+                       tlb1_read_entry(&e, i);
+                       if (!(e.mas1 & MAS1_VALID))
                                continue;
-                       if (pa >= tlb1[i].phys &&
-                           (pa + size) <= (tlb1[i].phys + tlb1[i].size))
-                               return (void *)(tlb1[i].virt +
-                                   (vm_offset_t)(pa - tlb1[i].phys));
+                       if (pa >= e.phys &&
+                           (pa + size) <= (e.phys + e.size))
+                               return (void *)(e.virt +
+                                   (vm_offset_t)(pa - e.phys));
                }
        }
 
@@ -2846,9 +2867,10 @@ mmu_booke_mapdev_attr(mmu_t mmu, vm_padd
                        } while (va % sz != 0);
                }
                if (bootverbose)
-                       printf("Wiring VA=%x to PA=%jx (size=%x), "
-                           "using TLB1[%d]\n", va, (uintmax_t)pa, sz, 
tlb1_idx);
-               tlb1_set_entry(va, pa, sz, tlb_calc_wimg(pa, ma));
+                       printf("Wiring VA=%x to PA=%jx (size=%x)\n",
+                           va, (uintmax_t)pa, sz);
+               tlb1_set_entry(va, pa, sz,
+                   _TLB_ENTRY_SHARED | tlb_calc_wimg(pa, ma));
                size -= sz;
                pa += sz;
                va += sz;
@@ -2912,30 +2934,34 @@ mmu_booke_change_attr(mmu_t mmu, vm_offs
        vm_offset_t va;
        pte_t *pte;
        int i, j;
+       tlb_entry_t e;
 
        /* Check TLB1 mappings */
-       for (i = 0; i < tlb1_idx; i++) {
-               if (!(tlb1[i].mas1 & MAS1_VALID))
+       for (i = 0; i < TLB1_ENTRIES; i++) {
+               tlb1_read_entry(&e, i);
+               if (!(e.mas1 & MAS1_VALID))
                        continue;
-               if (addr >= tlb1[i].virt && addr < tlb1[i].virt + tlb1[i].size)
+               if (addr >= e.virt && addr < e.virt + e.size)
                        break;
        }
-       if (i < tlb1_idx) {
+       if (i < TLB1_ENTRIES) {
                /* Only allow full mappings to be modified for now. */
                /* Validate the range. */
-               for (j = i, va = addr; va < addr + sz; va += tlb1[j].size, j++) 
{
-                       if (va != tlb1[j].virt || (sz - (va - addr) < 
tlb1[j].size))
+               for (j = i, va = addr; va < addr + sz; va += e.size, j++) {
+                       tlb1_read_entry(&e, j);
+                       if (va != e.virt || (sz - (va - addr) < e.size))
                                return (EINVAL);
                }
-               for (va = addr; va < addr + sz; va += tlb1[i].size, i++) {
-                       tlb1[i].mas2 &= ~MAS2_WIMGE_MASK;
-                       tlb1[i].mas2 |= tlb_calc_wimg(tlb1[i].phys, mode);
+               for (va = addr; va < addr + sz; va += e.size, i++) {
+                       tlb1_read_entry(&e, i);
+                       e.mas2 &= ~MAS2_WIMGE_MASK;
+                       e.mas2 |= tlb_calc_wimg(e.phys, mode);
 
                        /*
                         * Write it out to the TLB.  Should really re-sync with 
other
                         * cores.
                         */
-                       tlb1_write_entry(i);
+                       tlb1_write_entry(&e, i);
                }
                return (0);
        }
@@ -3118,12 +3144,48 @@ tlb0_print_tlbentries(void)
  *             windows, other devices mappings.
  */
 
+ /*
+ * Read an entry from given TLB1 slot.
+ */
+void
+tlb1_read_entry(tlb_entry_t *entry, unsigned int slot)
+{
+       uint32_t mas0;
+
+       KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__));
+
+       mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot);
+       mtspr(SPR_MAS0, mas0);
+       __asm __volatile("isync; tlbre");
+
+       entry->mas1 = mfspr(SPR_MAS1);
+       entry->mas2 = mfspr(SPR_MAS2);
+       entry->mas3 = mfspr(SPR_MAS3);
+
+       switch ((mfpvr() >> 16) & 0xFFFF) {
+       case FSL_E500v2:
+       case FSL_E500mc:
+       case FSL_E5500:
+               entry->mas7 = mfspr(SPR_MAS7);
+               break;
+       default:
+               entry->mas7 = 0;
+               break;
+       }
+
+       entry->virt = entry->mas2 & MAS2_EPN_MASK;
+       entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) |
+           (entry->mas3 & MAS3_RPN);
+       entry->size =
+           tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT);
+}
+
 /*
  * Write given entry to TLB1 hardware.
  * Use 32 bit pa, clear 4 high-order bits of RPN (mas7).
  */
 static void
-tlb1_write_entry(unsigned int idx)
+tlb1_write_entry(tlb_entry_t *e, unsigned int idx)
 {
        uint32_t mas0;
 
@@ -3135,11 +3197,11 @@ tlb1_write_entry(unsigned int idx)
 
        mtspr(SPR_MAS0, mas0);
        __asm __volatile("isync");
-       mtspr(SPR_MAS1, tlb1[idx].mas1);
+       mtspr(SPR_MAS1, e->mas1);
        __asm __volatile("isync");
-       mtspr(SPR_MAS2, tlb1[idx].mas2);
+       mtspr(SPR_MAS2, e->mas2);
        __asm __volatile("isync");
-       mtspr(SPR_MAS3, tlb1[idx].mas3);
+       mtspr(SPR_MAS3, e->mas3);
        __asm __volatile("isync");
        switch ((mfpvr() >> 16) & 0xFFFF) {
        case FSL_E500mc:
@@ -3148,7 +3210,7 @@ tlb1_write_entry(unsigned int idx)
                __asm __volatile("isync");
                /* FALLTHROUGH */
        case FSL_E500v2:
-               mtspr(SPR_MAS7, tlb1[idx].mas7);
+               mtspr(SPR_MAS7, e->mas7);
                __asm __volatile("isync");
                break;
        default:
@@ -3207,10 +3269,21 @@ int
 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size,
     uint32_t flags)
 {
+       tlb_entry_t e;
        uint32_t ts, tid;
        int tsize, index;
 
-       index = atomic_fetchadd_int(&tlb1_idx, 1);
+       for (index = 0; index < TLB1_ENTRIES; index++) {
+               tlb1_read_entry(&e, index);
+               if ((e.mas1 & MAS1_VALID) == 0)
+                       break;
+               /* Check if we're just updating the flags, and update them. */
+               if (e.phys == pa && e.virt == va && e.size == size) {
+                       e.mas2 = (va & MAS2_EPN_MASK) | flags;
+                       tlb1_write_entry(&e, index);
+                       return (0);
+               }
+       }
        if (index >= TLB1_ENTRIES) {
                printf("tlb1_set_entry: TLB1 full!\n");
                return (-1);
@@ -3223,23 +3296,18 @@ tlb1_set_entry(vm_offset_t va, vm_paddr_
        /* XXX TS is hard coded to 0 for now as we only use single address 
space */
        ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK;
 
-       /*
-        * Atomicity is preserved by the atomic increment above since nothing
-        * is ever removed from tlb1.
-        */
-
-       tlb1[index].phys = pa;
-       tlb1[index].virt = va;
-       tlb1[index].size = size;
-       tlb1[index].mas1 = MAS1_VALID | MAS1_IPROT | ts | tid;
-       tlb1[index].mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK);
-       tlb1[index].mas2 = (va & MAS2_EPN_MASK) | flags;
+       e.phys = pa;
+       e.virt = va;
+       e.size = size;
+       e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid;
+       e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK);
+       e.mas2 = (va & MAS2_EPN_MASK) | flags;
 
        /* Set supervisor RWX permission bits */
-       tlb1[index].mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX;
-       tlb1[index].mas7 = (pa >> 32) & MAS7_RPN;
+       e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX;
+       e.mas7 = (pa >> 32) & MAS7_RPN;
 
-       tlb1_write_entry(index);
+       tlb1_write_entry(&e, index);
 
        /*
         * XXX in general TLB1 updates should be propagated between CPUs,
@@ -3302,7 +3370,8 @@ tlb1_mapin_region(vm_offset_t va, vm_pad
        for (idx = 0; idx < nents; idx++) {
                pgsz = pgs[idx];
                debugf("%u: %llx -> %x, size=%x\n", idx, pa, va, pgsz);
-               tlb1_set_entry(va, pa, pgsz, _TLB_ENTRY_MEM);
+               tlb1_set_entry(va, pa, pgsz,
+                   _TLB_ENTRY_SHARED | _TLB_ENTRY_MEM);
                pa += pgsz;
                va += pgsz;
        }
@@ -3326,9 +3395,6 @@ tlb1_init()
 {
        uint32_t mas0, mas1, mas2, mas3, mas7;
        uint32_t tsz;
-       int i;
-
-       tlb1_idx = 1;
 
        tlb1_get_tlbconf();
 
@@ -3341,27 +3407,11 @@ tlb1_init()
        mas3 = mfspr(SPR_MAS3);
        mas7 = mfspr(SPR_MAS7);
 
-       tlb1[0].mas1 = mas1;
-       tlb1[0].mas2 = mfspr(SPR_MAS2);
-       tlb1[0].mas3 = mas3;
-       tlb1[0].mas7 = mas7;
-       tlb1[0].virt = mas2 & MAS2_EPN_MASK;
-       tlb1[0].phys =  ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) |
+       kernload =  ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) |
            (mas3 & MAS3_RPN);
 
-       kernload = tlb1[0].phys;
-
        tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
-       tlb1[0].size = (tsz > 0) ? tsize2size(tsz) : 0;
-       kernsize += tlb1[0].size;
-
-#ifdef SMP
-       bp_ntlb1s = tlb1_idx;
-#endif
-
-       /* Purge the remaining entries */
-       for (i = tlb1_idx; i < TLB1_ENTRIES; i++)
-               tlb1_write_entry(i);
+       kernsize += (tsz > 0) ? tsize2size(tsz) : 0;
 
        /* Setup TLB miss defaults */
        set_mas4_defaults();
@@ -3373,15 +3423,17 @@ pmap_early_io_map(vm_paddr_t pa, vm_size
        vm_paddr_t pa_base;
        vm_offset_t va, sz;
        int i;
+       tlb_entry_t e;
 
        KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!"));
        
-       for (i = 0; i < tlb1_idx; i++) {
-               if (!(tlb1[i].mas1 & MAS1_VALID))
+       for (i = 0; i < TLB1_ENTRIES; i++) {
+               tlb1_read_entry(&e, i);
+               if (!(e.mas1 & MAS1_VALID))
                        continue;
-               if (pa >= tlb1[i].phys && (pa + size) <=
-                   (tlb1[i].phys + tlb1[i].size))
-                       return (tlb1[i].virt + (pa - tlb1[i].phys));
+               if (pa >= e.phys && (pa + size) <=
+                   (e.phys + e.size))
+                       return (e.virt + (pa - e.phys));
        }
 
        pa_base = rounddown(pa, PAGE_SIZE);
@@ -3391,16 +3443,13 @@ pmap_early_io_map(vm_paddr_t pa, vm_size
 
        do {
                sz = 1 << (ilog2(size) & ~1);
-               tlb1_set_entry(tlb1_map_base, pa_base, sz, _TLB_ENTRY_IO);
+               tlb1_set_entry(tlb1_map_base, pa_base, sz,
+                   _TLB_ENTRY_SHARED | _TLB_ENTRY_IO);
                size -= sz;
                pa_base += sz;
                tlb1_map_base += sz;
        } while (size > 0);
 
-#ifdef SMP
-       bp_ntlb1s = tlb1_idx;
-#endif
-
        return (va);
 }
 
@@ -3450,20 +3499,6 @@ tlb1_print_tlbentries(void)
 }
 
 /*
- * Print out contents of the in-ram tlb1 table.
- */
-void
-tlb1_print_entries(void)
-{
-       int i;
-
-       debugf("tlb1[] table entries:\n");
-       for (i = 0; i < TLB1_ENTRIES; i++)
-               tlb_print_entry(i, tlb1[i].mas1, tlb1[i].mas2, tlb1[i].mas3,
-                   tlb1[i].mas7);
-}
-
-/*
  * Return 0 if the physical IO range is encompassed by one of the
  * the TLB1 entries, otherwise return related error code.
  */
@@ -3475,39 +3510,41 @@ tlb1_iomapped(int i, vm_paddr_t pa, vm_s
        vm_paddr_t pa_end;
        unsigned int entry_tsize;
        vm_size_t entry_size;
+       tlb_entry_t e;
 
        *va = (vm_offset_t)NULL;
 
+       tlb1_read_entry(&e, i);
        /* Skip invalid entries */
-       if (!(tlb1[i].mas1 & MAS1_VALID))
+       if (!(e.mas1 & MAS1_VALID))
                return (EINVAL);
 
        /*
         * The entry must be cache-inhibited, guarded, and r/w
         * so it can function as an i/o page
         */
-       prot = tlb1[i].mas2 & (MAS2_I | MAS2_G);
+       prot = e.mas2 & (MAS2_I | MAS2_G);
        if (prot != (MAS2_I | MAS2_G))
                return (EPERM);
 
-       prot = tlb1[i].mas3 & (MAS3_SR | MAS3_SW);
+       prot = e.mas3 & (MAS3_SR | MAS3_SW);
        if (prot != (MAS3_SR | MAS3_SW))
                return (EPERM);
 
        /* The address should be within the entry range. */
-       entry_tsize = (tlb1[i].mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
+       entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
        KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize"));
 
        entry_size = tsize2size(entry_tsize);
-       pa_start = (((vm_paddr_t)tlb1[i].mas7 & MAS7_RPN) << 32) | 
-           (tlb1[i].mas3 & MAS3_RPN);
+       pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | 
+           (e.mas3 & MAS3_RPN);
        pa_end = pa_start + entry_size;
 
        if ((pa < pa_start) || ((pa + size) > pa_end))
                return (ERANGE);
 
        /* Return virtual address of this mapping. */
-       *va = (tlb1[i].mas2 & MAS2_EPN_MASK) + (pa - pa_start);
+       *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start);
        return (0);
 }
 

Modified: head/sys/powerpc/include/tlb.h
==============================================================================
--- head/sys/powerpc/include/tlb.h      Tue Apr 19 01:25:35 2016        
(r298236)
+++ head/sys/powerpc/include/tlb.h      Tue Apr 19 01:48:18 2016        
(r298237)
@@ -74,7 +74,7 @@
 #define        MAS2_M                  0x00000004
 #define        MAS2_G                  0x00000002
 #define        MAS2_E                  0x00000001
-#define        MAS2_WIMGE_MASK         0x0000001F
+#define        MAS2_WIMGE_MASK         0x0000007F
 
 #define        MAS3_RPN                0xFFFFF000
 #define        MAS3_RPN_SHIFT          12
@@ -120,9 +120,17 @@
  */
 #define KERNEL_REGION_MAX_TLB_ENTRIES   4
 
+/*
+ * Use MAS2_X0 to mark entries which will be copied
+ * to AP CPUs during SMP bootstrap. As result entries
+ * marked with _TLB_ENTRY_SHARED will be shared by all CPUs.
+ */
+#define _TLB_ENTRY_SHARED      (MAS2_X0)       /* XXX under SMP? */
 #define _TLB_ENTRY_IO  (MAS2_I | MAS2_G)
 #define _TLB_ENTRY_MEM (MAS2_M)
 
+#define TLB1_MAX_ENTRIES       64
+
 #if !defined(LOCORE)
 typedef struct tlb_entry {
        vm_paddr_t phys;
@@ -211,6 +219,7 @@ struct pmap;
 
 void tlb_lock(uint32_t *);
 void tlb_unlock(uint32_t *);
+void tlb1_ap_prep(void);
 int  tlb1_set_entry(vm_offset_t, vm_paddr_t, vm_size_t, uint32_t);
 
 #endif /* !LOCORE */

Modified: head/sys/powerpc/mpc85xx/platform_mpc85xx.c
==============================================================================
--- head/sys/powerpc/mpc85xx/platform_mpc85xx.c Tue Apr 19 01:25:35 2016        
(r298236)
+++ head/sys/powerpc/mpc85xx/platform_mpc85xx.c Tue Apr 19 01:48:18 2016        
(r298237)
@@ -62,9 +62,7 @@ __FBSDID("$FreeBSD$");
 extern void *ap_pcpu;
 extern vm_paddr_t kernload;            /* Kernel physical load address */
 extern uint8_t __boot_page[];          /* Boot page body */
-extern uint32_t bp_ntlb1s;
-extern uint32_t bp_tlb1[];
-extern uint32_t bp_tlb1_end[];
+extern uint32_t bp_kernload;
 #endif
 
 extern uint32_t *bootinfo;
@@ -321,10 +319,9 @@ static int
 mpc85xx_smp_start_cpu(platform_t plat, struct pcpu *pc)
 {
 #ifdef SMP
-       uint32_t *tlb1;
        vm_paddr_t bptr;
        uint32_t reg;
-       int i, timeout;
+       int timeout;
        uintptr_t brr;
        int cpuid;
 
@@ -344,6 +341,7 @@ mpc85xx_smp_start_cpu(platform_t plat, s
        brr = OCP85XX_EEBPCR;
        cpuid = pc->pc_cpuid + 24;
 #endif
+       bp_kernload = kernload;
        reg = ccsr_read4(brr);
        if ((reg & (1 << cpuid)) != 0) {
                printf("SMP: CPU %d already out of hold-off state!\n",
@@ -354,20 +352,6 @@ mpc85xx_smp_start_cpu(platform_t plat, s
        ap_pcpu = pc;
        __asm __volatile("msync; isync");
 
-       i = 0;
-       tlb1 = bp_tlb1;
-       while (i < bp_ntlb1s && tlb1 < bp_tlb1_end) {
-               mtspr(SPR_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(i));
-               __asm __volatile("isync; tlbre");
-               tlb1[0] = mfspr(SPR_MAS1);
-               tlb1[1] = mfspr(SPR_MAS2);
-               tlb1[2] = mfspr(SPR_MAS3);
-               i++;
-               tlb1 += 3;
-       }
-       if (i < bp_ntlb1s)
-               bp_ntlb1s = i;
-
        /* Flush caches to have our changes hit DRAM. */
        cpu_flush_dcache(__boot_page, 4096);
 

Modified: head/sys/powerpc/powerpc/genassym.c
==============================================================================
--- head/sys/powerpc/powerpc/genassym.c Tue Apr 19 01:25:35 2016        
(r298236)
+++ head/sys/powerpc/powerpc/genassym.c Tue Apr 19 01:48:18 2016        
(r298237)
@@ -125,7 +125,7 @@ ASSYM(PM_PDIR, offsetof(struct pmap, pm_
 ASSYM(PTE_RPN, 0);
 ASSYM(PTE_FLAGS, sizeof(uint32_t));
 #if defined(BOOKE_E500)
-ASSYM(TLB0_ENTRY_SIZE, sizeof(struct tlb_entry));
+ASSYM(TLB_ENTRY_SIZE, sizeof(struct tlb_entry));
 #endif
 #endif
 

Modified: head/sys/powerpc/powerpc/mp_machdep.c
==============================================================================
--- head/sys/powerpc/powerpc/mp_machdep.c       Tue Apr 19 01:25:35 2016        
(r298236)
+++ head/sys/powerpc/powerpc/mp_machdep.c       Tue Apr 19 01:48:18 2016        
(r298237)
@@ -212,6 +212,9 @@ cpu_mp_unleash(void *dummy)
 
        cpus = 0;
        smp_cpus = 0;
+#ifdef BOOKE
+       tlb1_ap_prep();
+#endif
        STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
                cpus++;
                if (!pc->pc_bsp) {
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to