hi,
when booting under VMWARE we've got following dmesg lines:

[    0.051567] perf_event_intel: CPUID marked event: 'cpu cycles' unavailable
[    0.051567] perf_event_intel: CPUID marked event: 'instructions' unavailable
[    0.051568] perf_event_intel: CPUID marked event: 'bus cycles' unavailable
[    0.051568] perf_event_intel: CPUID marked event: 'cache references' 
unavailable
[    0.051569] perf_event_intel: CPUID marked event: 'cache misses' unavailable
[    0.051570] perf_event_intel: CPUID marked event: 'branch instructions' 
unavailable
[    0.051570] perf_event_intel: CPUID marked event: 'branch misses' unavailable

that means all the architectural events are disabled by CPUID(0xa)

The kernel code sets intel_perfmon_event_map to prevent
those event to be configured by PERF_TYPE_HARDWARE pmu
type. However they can still be configured by via
PERF_TYPE_RAW type.

We're getting GP fault on VMWARE when reading cycles PMC
configured throgh the PERF_TYPE_RAW interface:

 #4 [ffff88007c603e10] do_general_protection at ffffffff8163da9e
 #5 [ffff88007c603e40] general_protection at ffffffff8163d3a8
    [exception RIP: native_read_pmc+6]
    RIP: ffffffff81058d66  RSP: ffff88007c603ef0  RFLAGS: 00010083
    RAX: ffffffff81957ee0  RBX: 0000000000000000  RCX: 0000000040000002
    RDX: 000000000ff8f719  RSI: ffff88007c617fa8  RDI: 0000000040000002
    RBP: ffff88007c603ef0   R8: 00007ffde5053150   R9: 0000000000000000
    R10: 00007ffde5052530  R11: 00007fbb22aedc70  R12: ffffffff80000001
    R13: ffff880079b74400  R14: ffff880079b74578  R15: 0000000000000010
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0000
 #6 [ffff88007c603ef8] x86_perf_event_update at ffffffff81029e03
 #7 [ffff88007c603f30] x86_pmu_read at ffffffff8102a079
 #8 [ffff88007c603f40] __perf_event_read at ffffffff811590de

I couldn't find what real HW rdpmc does on this situation,
so I'm not sure if we actually want to prevent this.. patch
below tries to catch this case.

thanks,
jirka


---
 arch/x86/events/core.c       |  8 ++++-
 arch/x86/events/intel/core.c | 72 ++++++++++++++++++++++++++++++++------------
 arch/x86/events/perf_event.h |  6 ++++
 3 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 473519100b11..d836c5922b12 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -534,8 +534,14 @@ int x86_pmu_hw_config(struct perf_event *event)
        if (!event->attr.exclude_kernel)
                event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
 
-       if (event->attr.type == PERF_TYPE_RAW)
+       if (event->attr.type == PERF_TYPE_RAW) {
+               u64 arch_config = event->attr.config & INTEL_ARCH_EVENT_MASK;
+
+               if (x86_pmu_event_disabled(arch_config))
+                       return -ENOENT;
+
                event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
+       }
 
        if (event->attr.sample_period && x86_pmu.limit_period) {
                if (x86_pmu.limit_period(event, event->attr.sample_period) >
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 9049d62f34ae..99a83529c7ff 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -23,16 +23,22 @@
 /*
  * Intel PerfMon, used on Core and later.
  */
-static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
-{
-       [PERF_COUNT_HW_CPU_CYCLES]              = 0x003c,
-       [PERF_COUNT_HW_INSTRUCTIONS]            = 0x00c0,
-       [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x4f2e,
-       [PERF_COUNT_HW_CACHE_MISSES]            = 0x412e,
-       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x00c4,
-       [PERF_COUNT_HW_BRANCH_MISSES]           = 0x00c5,
-       [PERF_COUNT_HW_BUS_CYCLES]              = 0x013c,
-       [PERF_COUNT_HW_REF_CPU_CYCLES]          = 0x0300, /* pseudo-encoding */
+struct intel_perfmon_event {
+       u64     config;
+       bool    disabled;
+       u64     replacement;
+};
+
+static struct intel_perfmon_event intel_perfmon_event_map[PERF_COUNT_HW_MAX] 
__read_mostly =
+{
+       [PERF_COUNT_HW_CPU_CYCLES]              = { .config = 0x003c },
+       [PERF_COUNT_HW_INSTRUCTIONS]            = { .config = 0x00c0 },
+       [PERF_COUNT_HW_CACHE_REFERENCES]        = { .config = 0x4f2e },
+       [PERF_COUNT_HW_CACHE_MISSES]            = { .config = 0x412e },
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = { .config = 0x00c4 },
+       [PERF_COUNT_HW_BRANCH_MISSES]           = { .config = 0x00c5 },
+       [PERF_COUNT_HW_BUS_CYCLES]              = { .config = 0x013c },
+       [PERF_COUNT_HW_REF_CPU_CYCLES]          = { .config = 0x0300 }, /* 
pseudo-encoding */
 };
 
 static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -268,7 +274,31 @@ struct event_constraint intel_bdw_event_constraints[] = {
 
 static u64 intel_pmu_event_map(int hw_event)
 {
-       return intel_perfmon_event_map[hw_event];
+       struct intel_perfmon_event *event = &intel_perfmon_event_map[hw_event];
+
+       if (event->disabled)
+               return event->config;
+       if (event->replacement)
+               return event->replacement;
+
+       return event->config;
+}
+
+static bool intel_pmu_event_disabled(int hw_event)
+{
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(intel_perfmon_event_map); i++) {
+               struct intel_perfmon_event *event = 
&intel_perfmon_event_map[hw_event];
+
+               if (event->config != hw_event)
+                       continue;
+
+               if (event->disabled)
+                       return true;
+       }
+
+       return false;
 }
 
 /*
@@ -3165,6 +3195,7 @@ static __initconst const struct x86_pmu core_pmu = {
        .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
        .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
        .event_map              = intel_pmu_event_map,
+       .event_disabled         = intel_pmu_event_disabled,
        .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
        .apic                   = 1,
        .free_running_flags     = PEBS_FREERUNNING_FLAGS,
@@ -3205,6 +3236,7 @@ static __initconst const struct x86_pmu intel_pmu = {
        .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
        .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
        .event_map              = intel_pmu_event_map,
+       .event_disabled         = intel_pmu_event_disabled,
        .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
        .apic                   = 1,
        .free_running_flags     = PEBS_FREERUNNING_FLAGS,
@@ -3357,7 +3389,7 @@ static __init void intel_arch_events_quirk(void)
 
        /* disable event that reported as not presend by cpuid */
        for_each_set_bit(bit, x86_pmu.events_mask, 
ARRAY_SIZE(intel_arch_events_map)) {
-               intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
+               intel_perfmon_event_map[intel_arch_events_map[bit].id].disabled 
= true;
                pr_warn("CPUID marked event: \'%s\' unavailable\n",
                        intel_arch_events_map[bit].name);
        }
@@ -3375,7 +3407,7 @@ static __init void intel_nehalem_quirk(void)
                 * branch-misses, but it's still much better than the
                 * architectural event which is often completely bogus:
                 */
-               intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+               
intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES].replacement = 0x7f89;
                ebx.split.no_branch_misses_retired = 0;
                x86_pmu.events_maskl = ebx.full;
                pr_info("CPU erratum AAJ80 worked around\n");
@@ -3543,10 +3575,10 @@ __init int intel_pmu_init(void)
                x86_pmu.cpu_events = nhm_events_attrs;
 
                /* UOPS_ISSUED.STALLED_CYCLES */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+               
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement =
                        X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
                /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+               
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND].replacement =
                        X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
                intel_pmu_pebs_data_source_nhm();
@@ -3630,10 +3662,10 @@ __init int intel_pmu_init(void)
                x86_pmu.cpu_events = nhm_events_attrs;
 
                /* UOPS_ISSUED.STALLED_CYCLES */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+               
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement =
                        X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
                /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+               
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND].replacement =
                        X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
                intel_pmu_pebs_data_source_nhm();
@@ -3667,10 +3699,10 @@ __init int intel_pmu_init(void)
                x86_pmu.cpu_events = snb_events_attrs;
 
                /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+               
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement =
                        X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
                /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+               
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND].replacement =
                        X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
 
                pr_cont("SandyBridge events, ");
@@ -3704,7 +3736,7 @@ __init int intel_pmu_init(void)
                x86_pmu.cpu_events = snb_events_attrs;
 
                /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+               
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement =
                        X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
 
                pr_cont("IvyBridge events, ");
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 01ddfeadaee6..69cca7dc8de4 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -514,6 +514,7 @@ struct x86_pmu {
        int             (*addr_offset)(int index, bool eventsel);
        int             (*rdpmc_index)(int index);
        u64             (*event_map)(int);
+       bool            (*event_disabled)(int);
        int             max_events;
        int             num_counters;
        int             num_counters_fixed;
@@ -715,6 +716,11 @@ static inline int x86_pmu_rdpmc_index(int index)
        return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
 }
 
+static inline bool x86_pmu_event_disabled(u64 config)
+{
+       return x86_pmu.event_disabled ? x86_pmu.event_disabled(config) : false;
+}
+
 int x86_add_exclusive(unsigned int what);
 
 void x86_del_exclusive(unsigned int what);
-- 
2.7.4

Reply via email to