On 2/4/2019 10:38 AM, Peter Zijlstra wrote:


This then? That's nearly what you had; except a lot less noisy.

--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -18,6 +18,7 @@
  #include <asm/hardirq.h>
  #include <asm/intel-family.h>
  #include <asm/apic.h>
+#include <asm/cpu_device_id.h>
#include "../perf_event.h" @@ -3206,16 +3207,27 @@ static struct perf_guest_switch_msr *int
        arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
        arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
        arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
-       /*
-        * If PMU counter has PEBS enabled it is not enough to disable counter
-        * on a guest entry since PEBS memory write can overshoot guest entry
-        * and corrupt guest memory. Disabling PEBS solves the problem.
-        */
-       arr[1].msr = MSR_IA32_PEBS_ENABLE;
-       arr[1].host = cpuc->pebs_enabled;
-       arr[1].guest = 0;
+       if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+               arr[0].guest &= ~cpuc->pebs_enabled;
+       else
+               arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+       *nr = 1;
+
+       if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
+               /*
+                * If PMU counter has PEBS enabled it is not enough to
+                * disable counter on a guest entry since PEBS memory
+                * write can overshoot guest entry and corrupt guest
+                * memory. Disabling PEBS solves the problem.
+                *
+                * Don't do this if the CPU already enforces it.
+                */
+               arr[1].msr = MSR_IA32_PEBS_ENABLE;
+               arr[1].host = cpuc->pebs_enabled;
+               arr[1].guest = 0;
+               *nr = 2;
+       }
- *nr = 2;
        return arr;
  }
@@ -3739,6 +3751,48 @@ static __init void intel_clovertown_quir
        x86_pmu.pebs_constraints = NULL;
  }
+static const struct x86_cpu_desc isolation_ucodes[] = {
+       INTEL_CPU_DESC(INTEL_FAM6_HASWELL_CORE,          3, 0x0000001f),
+       INTEL_CPU_DESC(INTEL_FAM6_HASWELL_ULT,           1, 0x0000001e),
+       INTEL_CPU_DESC(INTEL_FAM6_HASWELL_GT3E,          1, 0x00000015),
+       INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X,             2, 0x00000037),
+       INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X,             4, 0x0000000a),
+       INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_CORE,        4, 0x00000023),
+       INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_GT3E,        1, 0x00000014),
+       INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,      2, 0x00000010),
+       INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,      3, 0x07000009),
+       INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,      4, 0x0f000009),
+       INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,      5, 0x0e000002),
+       INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X,           2, 0x0b000014),
+       INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             3, 0x00000021),
+       INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             4, 0x00000000),
+       INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_MOBILE,        3, 0x0000007c),
+       INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_DESKTOP,       3, 0x0000007c),
+       INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,      9, 0x0000004e),
+       INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,       9, 0x0000004e),
+       INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,      10, 0x0000004e),
+       INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,      11, 0x0000004e),
+       INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,      12, 0x0000004e),
+       INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,     10, 0x0000004e),
+       INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,     11, 0x0000004e),
+       INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,     12, 0x0000004e),
+       INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,     13, 0x0000004e),
+       INTEL_CPU_DESC(INTEL_FAM6_CANNONLAKE_MOBILE,     3, 0x00000000),
+       {}
+};
+
+static void intel_check_pebs_isolation(void)
+{
+       x86_pmu.pebs_no_isolation = 
!x86_cpu_has_min_microcode_rev(isolation_ucodes);
+}
+
+static __init void intel_pebs_isolation_quirk(void)
+{
+       WARN_ON_ONCE(x86_pmu.check_microcode);
+       x86_pmu.check_microcode = intel_check_pebs_isolation;
+       intel_check_pebs_isolation();
+}
+
  static int intel_snb_pebs_broken(int cpu)
  {
        u32 rev = UINT_MAX; /* default to broken for unknown models */
@@ -4433,6 +4487,7 @@ __init int intel_pmu_init(void)
        case INTEL_FAM6_HASWELL_ULT:
        case INTEL_FAM6_HASWELL_GT3E:
                x86_add_quirk(intel_ht_bug);
+               x86_add_quirk(intel_pebs_isolation_quirk);
                x86_pmu.late_ack = true;
                memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, 
sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, 
sizeof(hw_cache_extra_regs));
@@ -4464,6 +4519,7 @@ __init int intel_pmu_init(void)
        case INTEL_FAM6_BROADWELL_XEON_D:
        case INTEL_FAM6_BROADWELL_GT3E:
        case INTEL_FAM6_BROADWELL_X:
+               x86_add_quirk(intel_pebs_isolation_quirk);
                x86_pmu.late_ack = true;
                memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, 
sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, 
sizeof(hw_cache_extra_regs));
@@ -4526,6 +4582,7 @@ __init int intel_pmu_init(void)
        case INTEL_FAM6_SKYLAKE_X:
        case INTEL_FAM6_KABYLAKE_MOBILE:
        case INTEL_FAM6_KABYLAKE_DESKTOP:
+               x86_add_quirk(intel_pebs_isolation_quirk);
                x86_pmu.late_ack = true;
                memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, 
sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, 
sizeof(hw_cache_extra_regs));
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1628,6 +1628,7 @@ void __init intel_ds_init(void)
        x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
        x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
        x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
+       x86_pmu.pebs_no_isolation = 1;

We will submit the Icelake support soon (probably next week).
That will be a problem for Icelake.

Thanks,
Kan

        if (x86_pmu.pebs) {
                char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
                int format = x86_pmu.intel_cap.pebs_format;
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -601,13 +601,14 @@ struct x86_pmu {
        /*
         * Intel DebugStore bits
         */
-       unsigned int    bts             :1,
-                       bts_active      :1,
-                       pebs            :1,
-                       pebs_active     :1,
-                       pebs_broken     :1,
-                       pebs_prec_dist  :1,
-                       pebs_no_tlb     :1;
+       unsigned int    bts                     :1,
+                       bts_active              :1,
+                       pebs                    :1,
+                       pebs_active             :1,
+                       pebs_broken             :1,
+                       pebs_prec_dist          :1,
+                       pebs_no_tlb             :1,
+                       pebs_no_isolation       :1;
        int             pebs_record_size;
        int             pebs_buffer_size;
        void            (*drain_pebs)(struct pt_regs *regs);

Reply via email to