From: Kan Liang <kan.li...@linux.intel.com>

The physical addresses for the last several samples are always lost in
large PEBS. For example,
 #perf record -e mem-loads:uP --phys-data -c10000 -- ./dtlb
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.103 MB perf.data (2661 samples) ]
 #perf script -Ftime,event,sym,phys_addr  | tail
  1595.162483: mem-loads:uP: DoDependentLoads 0
  1595.162484: mem-loads:uP: DoDependentLoads 0
  1595.162484: mem-loads:uP: DoDependentLoads 0
  1595.162485: mem-loads:uP: DoDependentLoads 0

The problem happens because the mapping has been removed before walking
through the pages table.
To avoid this, drain the PEBS buffer on munmap.

With the patch,
 #perf script -Ftime,event,sym,phys_addr  | tail
  190.425922: mem-loads:uP: DoDependentLoads 3ce180a80
  190.425922: mem-loads:uP: DoDependentLoads 3c59ef540
  190.425922: mem-loads:uP: DoDependentLoads 3e3c73dc0
  190.425923: mem-loads:uP: DoDependentLoads 3d6c0d440

Fixes: fc7ce9c74c3a ("perf/core, x86: Add PERF_SAMPLE_PHYS_ADDR")
Signed-off-by: Kan Liang <kan.li...@linux.intel.com>
---
 arch/x86/events/core.c       | 7 +++++++
 arch/x86/events/intel/core.c | 6 ++++++
 arch/x86/events/intel/ds.c   | 8 ++++++++
 arch/x86/events/perf_event.h | 4 +++-
 4 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index de32741d041a..9b23b49a0778 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2288,6 +2288,12 @@ static void x86_pmu_sched_task(struct perf_event_context 
*ctx, bool sched_in)
                x86_pmu.sched_task(ctx, sched_in);
 }
 
+static void x86_pmu_munmap(void)
+{
+       if (x86_pmu.munmap)
+               x86_pmu.munmap();
+}
+
 void perf_check_microcode(void)
 {
        if (x86_pmu.check_microcode)
@@ -2317,6 +2323,7 @@ static struct pmu pmu = {
 
        .event_idx              = x86_pmu_event_idx,
        .sched_task             = x86_pmu_sched_task,
+       .munmap                 = x86_pmu_munmap,
        .task_ctx_size          = sizeof(struct x86_perf_task_context),
 };
 
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0fb8659b20d8..db393f6b3f53 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3548,6 +3548,11 @@ static void intel_pmu_sched_task(struct 
perf_event_context *ctx,
        intel_pmu_lbr_sched_task(ctx, sched_in);
 }
 
+static void intel_pmu_munmap(void)
+{
+       intel_pmu_pebs_munmap();
+}
+
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
 
 PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -3669,6 +3674,7 @@ static __initconst const struct x86_pmu intel_pmu = {
        .cpu_dying              = intel_pmu_cpu_dying,
        .guest_get_msrs         = intel_guest_get_msrs,
        .sched_task             = intel_pmu_sched_task,
+       .munmap                 = intel_pmu_munmap,
 };
 
 static __init void intel_clovertown_quirk(void)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index b7b01d762d32..a0ca0e7c005c 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -893,6 +893,14 @@ void intel_pmu_pebs_sched_task(struct perf_event_context 
*ctx, bool sched_in)
                intel_pmu_drain_pebs_buffer();
 }
 
+void intel_pmu_pebs_munmap(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (pebs_needs_sched_cb(cpuc))
+               intel_pmu_drain_pebs_buffer();
+}
+
 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 {
        struct debug_store *ds = cpuc->ds;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index adae087cecdd..e1a8b8b928e8 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -591,7 +591,7 @@ struct x86_pmu {
        void            (*check_microcode)(void);
        void            (*sched_task)(struct perf_event_context *ctx,
                                      bool sched_in);
-
+       void            (*munmap)(void);
        /*
         * Intel Arch Perfmon v2+
         */
@@ -932,6 +932,8 @@ void intel_pmu_pebs_disable_all(void);
 
 void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
 
+void intel_pmu_pebs_munmap(void);
+
 void intel_pmu_auto_reload_read(struct perf_event *event);
 
 void intel_ds_init(void);
-- 
2.17.1

Reply via email to