Author: alc
Date: Sat Apr 10 22:24:03 2010
New Revision: 206462
URL: http://svn.freebsd.org/changeset/base/206462

Log:
  MFC r204907, r204913, r205402, r205573, r205778
    Implement AMD's recommended workaround for Erratum 383 on Family 10h
    processors.
  
    Enable machine check exceptions by default.

Modified:
  stable/7/sys/amd64/amd64/mca.c
  stable/7/sys/amd64/amd64/pmap.c
  stable/7/sys/amd64/include/md_var.h
  stable/7/sys/amd64/include/specialreg.h
  stable/7/sys/i386/i386/mca.c
  stable/7/sys/i386/i386/pmap.c
  stable/7/sys/i386/include/md_var.h
  stable/7/sys/i386/include/specialreg.h
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/amd64/amd64/mca.c
==============================================================================
--- stable/7/sys/amd64/amd64/mca.c      Sat Apr 10 22:11:01 2010        
(r206461)
+++ stable/7/sys/amd64/amd64/mca.c      Sat Apr 10 22:24:03 2010        
(r206462)
@@ -60,11 +60,20 @@ static int mca_count;               /* Number of reco
 
 SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check 
Architecture");
 
-static int mca_enabled = 0;
+static int mca_enabled = 1;
 TUNABLE_INT("hw.mca.enabled", &mca_enabled);
 SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
     "Administrative toggle for machine check support");
 
+static int amd10h_L1TP = 1;
+TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP);
+SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
+    "Administrative toggle for logging of level one TLB parity (L1TP) errors");
+
+int workaround_erratum383;
+SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 
0,
+    "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
+
 static STAILQ_HEAD(, mca_internal) mca_records;
 static struct callout mca_timer;
 static int mca_ticks = 3600;   /* Check hourly by default. */
@@ -527,7 +536,7 @@ void
 mca_init(void)
 {
        uint64_t mcg_cap;
-       uint64_t ctl;
+       uint64_t ctl, mask;
        int skip;
        int i;
 
@@ -535,6 +544,15 @@ mca_init(void)
        if (!mca_enabled || !(cpu_feature & CPUID_MCE))
                return;
 
+       /*
+        * On AMD Family 10h processors, unless logging of level one TLB
+        * parity (L1TP) errors is disabled, enable the recommended workaround
+        * for Erratum 383.
+        */
+       if (cpu_vendor_id == CPU_VENDOR_AMD &&
+           CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
+               workaround_erratum383 = 1;
+
        if (cpu_feature & CPUID_MCA) {
                if (PCPU_GET(cpuid) == 0)
                        mca_setup();
@@ -545,6 +563,19 @@ mca_init(void)
                        /* Enable MCA features. */
                        wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
 
+               /*
+                * Disable logging of level one TLB parity (L1TP) errors by
+                * the data cache as an alternative workaround for AMD Family
+                * 10h Erratum 383.  Unlike the recommended workaround, there
+                * is no performance penalty to this workaround.  However,
+                * L1TP errors will go unreported.
+                */
+               if (cpu_vendor_id == CPU_VENDOR_AMD &&
+                   CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) {
+                       mask = rdmsr(MSR_MC0_CTL_MASK);
+                       if ((mask & (1UL << 5)) == 0)
+                               wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
+               }
                for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
                        /* By default enable logging of all errors. */
                        ctl = 0xffffffffffffffffUL;

Modified: stable/7/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/7/sys/amd64/amd64/pmap.c     Sat Apr 10 22:11:01 2010        
(r206461)
+++ stable/7/sys/amd64/amd64/pmap.c     Sat Apr 10 22:24:03 2010        
(r206462)
@@ -7,7 +7,7 @@
  * All rights reserved.
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
- * Copyright (c) 2005-2008 Alan L. Cox <a...@cs.rice.edu>
+ * Copyright (c) 2005-2010 Alan L. Cox <a...@cs.rice.edu>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
@@ -249,6 +249,9 @@ static void pmap_remove_entry(struct pma
 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m);
+static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+    pd_entry_t newpde);
+static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
 
 static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags);
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
@@ -658,13 +661,13 @@ pmap_init(void)
        pv_entry_high_water = 9 * (pv_entry_max / 10);
 
        /*
-        * Disable large page mappings by default if the kernel is running in
-        * a virtual machine on an AMD Family 10h processor.  This is a work-
-        * around for Erratum 383.
+        * If the kernel is running in a virtual machine on an AMD Family 10h
+        * processor, then it must assume that MCA is enabled by the virtual
+        * machine monitor.
         */
        if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
            CPUID_TO_FAMILY(cpu_id) == 0x10)
-               pg_ps_enabled = 0;
+               workaround_erratum383 = 1;
 
        /*
         * Are large page mappings enabled?
@@ -809,6 +812,45 @@ pmap_cache_bits(int mode, boolean_t is_p
                cache_bits |= PG_NC_PWT;
        return (cache_bits);
 }
+
+/*
+ * After changing the page size for the specified virtual address in the page
+ * table, flush the corresponding entries from the processor's TLB.  Only the
+ * calling processor's TLB is affected.
+ *
+ * The calling thread must be pinned to a processor.
+ */
+static void
+pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
+{
+       u_long cr4;
+
+       if ((newpde & PG_PS) == 0)
+               /* Demotion: flush a specific 2MB page mapping. */
+               invlpg(va);
+       else if ((newpde & PG_G) == 0)
+               /*
+                * Promotion: flush every 4KB page mapping from the TLB
+                * because there are too many to flush individually.
+                */
+               invltlb();
+       else {
+               /*
+                * Promotion: flush every 4KB page mapping from the TLB,
+                * including any global (PG_G) mappings.
+                */
+               cr4 = rcr4();
+               load_cr4(cr4 & ~CR4_PGE);
+               /*
+                * Although preemption at this point could be detrimental to
+                * performance, it would not lead to an error.  PG_G is simply
+                * ignored if CR4.PGE is clear.  Moreover, in case this block
+                * is re-entered, the load_cr4() either above or below will
+                * modify CR4.PGE flushing the TLB.
+                */
+               load_cr4(cr4 | CR4_PGE);
+       }
+}
 #ifdef SMP
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
@@ -905,6 +947,69 @@ pmap_invalidate_cache(void)
        smp_cache_flush();
        sched_unpin();
 }
+
+struct pde_action {
+       cpumask_t store;        /* processor that updates the PDE */
+       cpumask_t invalidate;   /* processors that invalidate their TLB */
+       vm_offset_t va;
+       pd_entry_t *pde;
+       pd_entry_t newpde;
+};
+
+static void
+pmap_update_pde_action(void *arg)
+{
+       struct pde_action *act = arg;
+
+       if (act->store == PCPU_GET(cpumask))
+               pde_store(act->pde, act->newpde);
+}
+
+static void
+pmap_update_pde_teardown(void *arg)
+{
+       struct pde_action *act = arg;
+
+       if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+               pmap_update_pde_invalidate(act->va, act->newpde);
+}
+
+/*
+ * Change the page size for the specified virtual address in a way that
+ * prevents any possibility of the TLB ever having two entries that map the
+ * same virtual address using different page sizes.  This is the recommended
+ * workaround for Erratum 383 on AMD Family 10h processors.  It prevents a
+ * machine check exception for a TLB state that is improperly diagnosed as a
+ * hardware error.
+ */
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t 
newpde)
+{
+       struct pde_action act;
+       cpumask_t active, cpumask;
+
+       sched_pin();
+       cpumask = PCPU_GET(cpumask);
+       if (pmap == kernel_pmap)
+               active = all_cpus;
+       else
+               active = pmap->pm_active;
+       if ((active & PCPU_GET(other_cpus)) != 0) {
+               act.store = cpumask;
+               act.invalidate = active;
+               act.va = va;
+               act.pde = pde;
+               act.newpde = newpde;
+               smp_rendezvous_cpus(cpumask | active,
+                   smp_no_rendevous_barrier, pmap_update_pde_action,
+                   pmap_update_pde_teardown, &act);
+       } else {
+               pde_store(pde, newpde);
+               if ((active & cpumask) != 0)
+                       pmap_update_pde_invalidate(va, newpde);
+       }
+       sched_unpin();
+}
 #else /* !SMP */
 /*
  * Normal, non-SMP, invalidation functions.
@@ -942,6 +1047,15 @@ pmap_invalidate_cache(void)
 
        wbinvd();
 }
+
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t 
newpde)
+{
+
+       pde_store(pde, newpde);
+       if (pmap == kernel_pmap || pmap->pm_active)
+               pmap_update_pde_invalidate(va, newpde);
+}
 #endif /* !SMP */
 
 static void
@@ -2324,7 +2438,10 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t 
         * processor changing the setting of PG_A and/or PG_M between
         * the read above and the store below. 
         */
-       pde_store(pde, newpde); 
+       if (workaround_erratum383)
+               pmap_update_pde(pmap, va, pde, newpde);
+       else
+               pde_store(pde, newpde);
 
        /*
         * Invalidate a stale recursive mapping of the page table page.
@@ -2940,7 +3057,10 @@ setpte:
        /*
         * Map the superpage.
         */
-       pde_store(pde, PG_PS | newpde);
+       if (workaround_erratum383)
+               pmap_update_pde(pmap, va, pde, PG_PS | newpde);
+       else
+               pde_store(pde, PG_PS | newpde);
 
        pmap_pde_promotions++;
        CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx"

Modified: stable/7/sys/amd64/include/md_var.h
==============================================================================
--- stable/7/sys/amd64/include/md_var.h Sat Apr 10 22:11:01 2010        
(r206461)
+++ stable/7/sys/amd64/include/md_var.h Sat Apr 10 22:24:03 2010        
(r206462)
@@ -61,6 +61,7 @@ extern        char    sigcode[];
 extern int     szsigcode;
 extern uint64_t *vm_page_dump;
 extern int     vm_page_dump_size;
+extern int     workaround_erratum383;
 
 typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
 struct thread;

Modified: stable/7/sys/amd64/include/specialreg.h
==============================================================================
--- stable/7/sys/amd64/include/specialreg.h     Sat Apr 10 22:11:01 2010        
(r206461)
+++ stable/7/sys/amd64/include/specialreg.h     Sat Apr 10 22:24:03 2010        
(r206462)
@@ -499,6 +499,7 @@
 #define        MSR_TOP_MEM     0xc001001a      /* boundary for ram below 4G */
 #define        MSR_TOP_MEM2    0xc001001d      /* boundary for ram above 4G */
 #define        MSR_K8_UCODE_UPDATE     0xc0010020      /* update microcode */
+#define        MSR_MC0_CTL_MASK        0xc0010044
 
 /* VIA ACE crypto featureset: for via_feature_rng */
 #define        VIA_HAS_RNG             1       /* cpu has RNG */

Modified: stable/7/sys/i386/i386/mca.c
==============================================================================
--- stable/7/sys/i386/i386/mca.c        Sat Apr 10 22:11:01 2010        
(r206461)
+++ stable/7/sys/i386/i386/mca.c        Sat Apr 10 22:24:03 2010        
(r206462)
@@ -60,11 +60,20 @@ static int mca_count;               /* Number of reco
 
 SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check 
Architecture");
 
-static int mca_enabled = 0;
+static int mca_enabled = 1;
 TUNABLE_INT("hw.mca.enabled", &mca_enabled);
 SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
     "Administrative toggle for machine check support");
 
+static int amd10h_L1TP = 1;
+TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP);
+SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
+    "Administrative toggle for logging of level one TLB parity (L1TP) errors");
+
+int workaround_erratum383;
+SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 
0,
+    "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
+
 static STAILQ_HEAD(, mca_internal) mca_records;
 static struct callout mca_timer;
 static int mca_ticks = 3600;   /* Check hourly by default. */
@@ -527,7 +536,7 @@ void
 mca_init(void)
 {
        uint64_t mcg_cap;
-       uint64_t ctl;
+       uint64_t ctl, mask;
        int skip;
        int i;
 
@@ -535,6 +544,15 @@ mca_init(void)
        if (!mca_enabled || !(cpu_feature & CPUID_MCE))
                return;
 
+       /*
+        * On AMD Family 10h processors, unless logging of level one TLB
+        * parity (L1TP) errors is disabled, enable the recommended workaround
+        * for Erratum 383.
+        */
+       if (cpu_vendor_id == CPU_VENDOR_AMD &&
+           CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
+               workaround_erratum383 = 1;
+
        if (cpu_feature & CPUID_MCA) {
                if (PCPU_GET(cpuid) == 0)
                        mca_setup();
@@ -545,6 +563,19 @@ mca_init(void)
                        /* Enable MCA features. */
                        wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
 
+               /*
+                * Disable logging of level one TLB parity (L1TP) errors by
+                * the data cache as an alternative workaround for AMD Family
+                * 10h Erratum 383.  Unlike the recommended workaround, there
+                * is no performance penalty to this workaround.  However,
+                * L1TP errors will go unreported.
+                */
+               if (cpu_vendor_id == CPU_VENDOR_AMD &&
+                   CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) {
+                       mask = rdmsr(MSR_MC0_CTL_MASK);
+                       if ((mask & (1UL << 5)) == 0)
+                               wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
+               }
                for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
                        /* By default enable logging of all errors. */
                        ctl = 0xffffffffffffffffUL;

Modified: stable/7/sys/i386/i386/pmap.c
==============================================================================
--- stable/7/sys/i386/i386/pmap.c       Sat Apr 10 22:11:01 2010        
(r206461)
+++ stable/7/sys/i386/i386/pmap.c       Sat Apr 10 22:24:03 2010        
(r206462)
@@ -5,7 +5,7 @@
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
- * Copyright (c) 2005-2008 Alan L. Cox <a...@cs.rice.edu>
+ * Copyright (c) 2005-2010 Alan L. Cox <a...@cs.rice.edu>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
@@ -290,6 +290,7 @@ static void pmap_insert_pt_page(pmap_t p
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
 static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
+static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
@@ -308,6 +309,9 @@ static void pmap_remove_entry(struct pma
 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m);
+static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+    pd_entry_t newpde);
+static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
 
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
 
@@ -392,6 +396,13 @@ pmap_bootstrap(vm_paddr_t firstaddr)
        kernel_pmap->pm_active = -1;    /* don't allow deactivation */
        TAILQ_INIT(&kernel_pmap->pm_pvchunk);
        LIST_INIT(&allpmaps);
+
+       /*
+        * Request a spin mutex so that changes to allpmaps cannot be
+        * preempted by smp_rendezvous_cpus().  Otherwise,
+        * pmap_update_pde_kernel() could access allpmaps while it is
+        * being changed.
+        */
        mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
        mtx_lock_spin(&allpmaps_lock);
        LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
@@ -672,19 +683,21 @@ pmap_init(void)
        pv_entry_high_water = 9 * (pv_entry_max / 10);
 
        /*
-        * Disable large page mappings by default if the kernel is running in
-        * a virtual machine on an AMD Family 10h processor.  This is a work-
-        * around for Erratum 383.
+        * If the kernel is running in a virtual machine on an AMD Family 10h
+        * processor, then it must assume that MCA is enabled by the virtual
+        * machine monitor.
         */
        if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
            CPUID_TO_FAMILY(cpu_id) == 0x10)
-               pg_ps_enabled = 0;
+               workaround_erratum383 = 1;
 
        /*
-        * Are large page mappings enabled?
+        * Are large page mappings supported and enabled?
         */
        TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
-       if (pg_ps_enabled) {
+       if (pseflag == 0)
+               pg_ps_enabled = 0;
+       else if (pg_ps_enabled) {
                KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
                    ("pmap_init: can't assign to pagesizes[1]"));
                pagesizes[1] = NBPDR;
@@ -826,6 +839,69 @@ pmap_cache_bits(int mode, boolean_t is_p
                cache_bits |= PG_NC_PWT;
        return (cache_bits);
 }
+
+/*
+ * The caller is responsible for maintaining TLB consistency.
+ */
+static void
+pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde)
+{
+       pd_entry_t *pde;
+       pmap_t pmap;
+       boolean_t PTD_updated;
+
+       PTD_updated = FALSE;
+       mtx_lock_spin(&allpmaps_lock);
+       LIST_FOREACH(pmap, &allpmaps, pm_list) {
+               if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
+                   PG_FRAME))
+                       PTD_updated = TRUE;
+               pde = pmap_pde(pmap, va);
+               pde_store(pde, newpde);
+       }
+       mtx_unlock_spin(&allpmaps_lock);
+       KASSERT(PTD_updated,
+           ("pmap_kenter_pde: current page table is not in allpmaps"));
+}
+
+/*
+ * After changing the page size for the specified virtual address in the page
+ * table, flush the corresponding entries from the processor's TLB.  Only the
+ * calling processor's TLB is affected.
+ *
+ * The calling thread must be pinned to a processor.
+ */
+static void
+pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
+{
+       u_long cr4;
+
+       if ((newpde & PG_PS) == 0)
+               /* Demotion: flush a specific 2MB page mapping. */
+               invlpg(va);
+       else if ((newpde & PG_G) == 0)
+               /*
+                * Promotion: flush every 4KB page mapping from the TLB
+                * because there are too many to flush individually.
+                */
+               invltlb();
+       else {
+               /*
+                * Promotion: flush every 4KB page mapping from the TLB,
+                * including any global (PG_G) mappings.
+                */
+               cr4 = rcr4();
+               load_cr4(cr4 & ~CR4_PGE);
+               /*
+                * Although preemption at this point could be detrimental to
+                * performance, it would not lead to an error.  PG_G is simply
+                * ignored if CR4.PGE is clear.  Moreover, in case this block
+                * is re-entered, the load_cr4() either above or below will
+                * modify CR4.PGE flushing the TLB.
+                */
+               load_cr4(cr4 | CR4_PGE);
+       }
+}
 #ifdef SMP
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
@@ -922,6 +998,92 @@ pmap_invalidate_cache(void)
        smp_cache_flush();
        sched_unpin();
 }
+
+struct pde_action {
+       cpumask_t store;        /* processor that updates the PDE */
+       cpumask_t invalidate;   /* processors that invalidate their TLB */
+       vm_offset_t va;
+       pd_entry_t *pde;
+       pd_entry_t newpde;
+};
+
+static void
+pmap_update_pde_kernel(void *arg)
+{
+       struct pde_action *act = arg;
+       pd_entry_t *pde;
+       pmap_t pmap;
+
+       if (act->store == PCPU_GET(cpumask))
+               /*
+                * Elsewhere, this operation requires allpmaps_lock for
+                * synchronization.  Here, it does not because it is being
+                * performed in the context of an all_cpus rendezvous.
+                */
+               LIST_FOREACH(pmap, &allpmaps, pm_list) {
+                       pde = pmap_pde(pmap, act->va);
+                       pde_store(pde, act->newpde);
+               }
+}
+
+static void
+pmap_update_pde_user(void *arg)
+{
+       struct pde_action *act = arg;
+
+       if (act->store == PCPU_GET(cpumask))
+               pde_store(act->pde, act->newpde);
+}
+
+static void
+pmap_update_pde_teardown(void *arg)
+{
+       struct pde_action *act = arg;
+
+       if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+               pmap_update_pde_invalidate(act->va, act->newpde);
+}
+
+/*
+ * Change the page size for the specified virtual address in a way that
+ * prevents any possibility of the TLB ever having two entries that map the
+ * same virtual address using different page sizes.  This is the recommended
+ * workaround for Erratum 383 on AMD Family 10h processors.  It prevents a
+ * machine check exception for a TLB state that is improperly diagnosed as a
+ * hardware error.
+ */
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t 
newpde)
+{
+       struct pde_action act;
+       cpumask_t active, cpumask;
+
+       sched_pin();
+       cpumask = PCPU_GET(cpumask);
+       if (pmap == kernel_pmap)
+               active = all_cpus;
+       else
+               active = pmap->pm_active;
+       if ((active & PCPU_GET(other_cpus)) != 0) {
+               act.store = cpumask;
+               act.invalidate = active;
+               act.va = va;
+               act.pde = pde;
+               act.newpde = newpde;
+               smp_rendezvous_cpus(cpumask | active,
+                   smp_no_rendevous_barrier, pmap == kernel_pmap ?
+                   pmap_update_pde_kernel : pmap_update_pde_user,
+                   pmap_update_pde_teardown, &act);
+       } else {
+               if (pmap == kernel_pmap)
+                       pmap_kenter_pde(va, newpde);
+               else
+                       pde_store(pde, newpde);
+               if ((active & cpumask) != 0)
+                       pmap_update_pde_invalidate(va, newpde);
+       }
+       sched_unpin();
+}
 #else /* !SMP */
 /*
  * Normal, non-SMP, 486+ invalidation functions.
@@ -959,6 +1121,18 @@ pmap_invalidate_cache(void)
 
        wbinvd();
 }
+
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t 
newpde)
+{
+
+       if (pmap == kernel_pmap)
+               pmap_kenter_pde(va, newpde);
+       else
+               pde_store(pde, newpde);
+       if (pmap == kernel_pmap || pmap->pm_active)
+               pmap_update_pde_invalidate(va, newpde);
+}
 #endif /* !SMP */
 
 void
@@ -1833,12 +2007,9 @@ SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTL
 void
 pmap_growkernel(vm_offset_t addr)
 {
-       struct pmap *pmap;
        vm_paddr_t ptppaddr;
        vm_page_t nkpg;
        pd_entry_t newpdir;
-       pt_entry_t *pde;
-       boolean_t updated_PTD;
 
        mtx_assert(&kernel_map->system_mtx, MA_OWNED);
        if (kernel_vm_end == 0) {
@@ -1880,18 +2051,7 @@ pmap_growkernel(vm_offset_t addr)
                newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
                pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir;
 
-               updated_PTD = FALSE;
-               mtx_lock_spin(&allpmaps_lock);
-               LIST_FOREACH(pmap, &allpmaps, pm_list) {
-                       if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
-                           PG_FRAME))
-                               updated_PTD = TRUE;
-                       pde = pmap_pde(pmap, kernel_vm_end);
-                       pde_store(pde, newpdir);
-               }
-               mtx_unlock_spin(&allpmaps_lock);
-               KASSERT(updated_PTD,
-                   ("pmap_growkernel: current page table is not in allpmaps"));
+               pmap_kenter_pde(kernel_vm_end, newpdir);
                kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 
~(PAGE_SIZE * NPTEPG - 1);
                if (kernel_vm_end - 1 >= kernel_map->max_offset) {
                        kernel_vm_end = kernel_map->max_offset;
@@ -2335,7 +2495,6 @@ static boolean_t
 pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
        pd_entry_t newpde, oldpde;
-       pmap_t allpmaps_entry;
        pt_entry_t *firstpte, newpte;
        vm_paddr_t mptepa;
        vm_page_t free, mpte;
@@ -2441,25 +2600,11 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t 
         * processor changing the setting of PG_A and/or PG_M between
         * the read above and the store below. 
         */
-       if (pmap == kernel_pmap) {
-               /*
-                * A harmless race exists between this loop and the bcopy()
-                * in pmap_pinit() that initializes the kernel segment of
-                * the new page table directory.  Specifically, that bcopy()
-                * may copy the new PDE from the PTD to the new page table
-                * before this loop updates that new page table.
-                */
-               mtx_lock_spin(&allpmaps_lock);
-               LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) {
-                       pde = pmap_pde(allpmaps_entry, va);
-                       KASSERT(*pde == newpde || (*pde & PG_PTE_PROMOTE) ==
-                           (oldpde & PG_PTE_PROMOTE),
-                           ("pmap_demote_pde: pde was %#jx, expected %#jx",
-                           (uintmax_t)*pde, (uintmax_t)oldpde));
-                       pde_store(pde, newpde);
-               }
-               mtx_unlock_spin(&allpmaps_lock);
-       } else
+       if (workaround_erratum383)
+               pmap_update_pde(pmap, va, pde, newpde);
+       else if (pmap == kernel_pmap)
+               pmap_kenter_pde(va, newpde);
+       else
                pde_store(pde, newpde); 
        if (firstpte == PADDR2)
                mtx_unlock(&PMAP2mutex);
@@ -2978,7 +3123,6 @@ static void
 pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
        pd_entry_t newpde;
-       pmap_t allpmaps_entry;
        pt_entry_t *firstpte, oldpte, pa, *pte;
        vm_offset_t oldpteva;
        vm_page_t mpte;
@@ -3082,14 +3226,11 @@ setpte:
        /*
         * Map the superpage.
         */
-       if (pmap == kernel_pmap) {
-               mtx_lock_spin(&allpmaps_lock);
-               LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) {
-                       pde = pmap_pde(allpmaps_entry, va);
-                       pde_store(pde, PG_PS | newpde);
-               }
-               mtx_unlock_spin(&allpmaps_lock);
-       } else
+       if (workaround_erratum383)
+               pmap_update_pde(pmap, va, pde, PG_PS | newpde);
+       else if (pmap == kernel_pmap)
+               pmap_kenter_pde(va, PG_PS | newpde);
+       else
                pde_store(pde, PG_PS | newpde);
 
        pmap_pde_promotions++;

Modified: stable/7/sys/i386/include/md_var.h
==============================================================================
--- stable/7/sys/i386/include/md_var.h  Sat Apr 10 22:11:01 2010        
(r206461)
+++ stable/7/sys/i386/include/md_var.h  Sat Apr 10 22:24:03 2010        
(r206462)
@@ -73,6 +73,7 @@ extern        int     szosigcode;
 #endif
 extern uint32_t *vm_page_dump;
 extern int     vm_page_dump_size;
+extern int     workaround_erratum383;
 
 typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
 struct thread;

Modified: stable/7/sys/i386/include/specialreg.h
==============================================================================
--- stable/7/sys/i386/include/specialreg.h      Sat Apr 10 22:11:01 2010        
(r206461)
+++ stable/7/sys/i386/include/specialreg.h      Sat Apr 10 22:24:03 2010        
(r206462)
@@ -544,6 +544,7 @@
 /* AMD64 MSR's */
 #define        MSR_EFER        0xc0000080      /* extended features */
 #define        MSR_K8_UCODE_UPDATE     0xc0010020      /* update microcode */
+#define        MSR_MC0_CTL_MASK        0xc0010044
 
 /* VIA ACE crypto featureset: for via_feature_rng */
 #define        VIA_HAS_RNG             1       /* cpu has RNG */
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to