From: Jan Dakinevich <jan.dakinev...@virtuozzo.com> Move LBR information from `struct x86_pmu' to separate structure `struct x86_pmu_lbr'.
LBR initialization is nailed to perf subsystem and to global 'boot_x86_pmu' structure. To reuse this code and keep these changes readable the work splited into to parts. https://jira.sw.ru/browse/PSBM-75679 Signed-off-by: Jan Dakinevich <jan.dakinev...@virtuozzo.com> Commit 81325a25ac55 in the kernels from VZ7. In commit 7b6f4449 "Simplify perf/x86/intel: make reusable LBR initialization code", Vasily Averin also added a few #defines (like #define lbr_nr lbr.nr) to keep more code unchanged and to simplify future maintenance and kernel rebases. I added such defines here, in the first patch, to make it less invasive. It is not possible, however, to add such defines for lbr.from and lbr.to fields: there would be conflicts with the existing arrays lbr_from[] and lbr_to[]. Besides, the kernel 4.18.0-x has support for more CPU modes ("Icelake", "Tigerlake", etc). The calls to intel_pmu_lbr_init_*() for these models are now handled too. The original patch also removed the check for a specific Atom-related erratum - I kept it unchanged here, because it logially belongs to the second patch ("perf/x86/intel: make LBR initialization reusable"). Apart from that, it was mostly context changes. Done in the scope of https://jira.sw.ru/browse/PSBM-127794. Signed-off-by: Evgenii Shatokhin <eshatok...@virtuozzo.com> --- arch/x86/events/intel/core.c | 38 +++++----- arch/x86/events/intel/lbr.c | 120 +++++++++++++++--------------- arch/x86/events/perf_event.h | 23 +++--- arch/x86/include/asm/perf_event.h | 20 +++++ 4 files changed, 108 insertions(+), 93 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fc55b91bdf39..e67d708f9140 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4083,9 +4083,9 @@ static void intel_snb_check_microcode(void) static bool is_lbr_from(unsigned long msr) { - unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr; + unsigned long lbr_from_nr = x86_pmu.lbr.from + x86_pmu.lbr_nr; - return x86_pmu.lbr_from <= msr && msr < lbr_from_nr; + return x86_pmu.lbr.from <= msr && msr < lbr_from_nr; } /* @@ -4632,7 +4632,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - intel_pmu_lbr_init_core(); + intel_pmu_lbr_init_core(&x86_pmu.lbr); x86_pmu.event_constraints = intel_core2_event_constraints; x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; @@ -4648,7 +4648,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_nhm(); + intel_pmu_lbr_init_nhm(&x86_pmu.lbr); x86_pmu.event_constraints = intel_nehalem_event_constraints; x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; @@ -4682,7 +4682,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - intel_pmu_lbr_init_atom(); + intel_pmu_lbr_init_atom(&x86_pmu.lbr); x86_pmu.event_constraints = intel_gen_event_constraints; x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; @@ -4701,7 +4701,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_slm(); + intel_pmu_lbr_init_slm(&x86_pmu.lbr); x86_pmu.event_constraints = intel_slm_event_constraints; x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; @@ -4721,7 +4721,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_skl(); + intel_pmu_lbr_init_skl(&x86_pmu.lbr); x86_pmu.event_constraints = intel_slm_event_constraints; x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints; @@ -4748,7 +4748,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_skl(); + intel_pmu_lbr_init_skl(&x86_pmu.lbr); x86_pmu.event_constraints = intel_slm_event_constraints; x86_pmu.extra_regs = intel_glm_extra_regs; @@ -4779,7 +4779,7 @@ __init int intel_pmu_init(void) sizeof(hw_cache_extra_regs)); hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - intel_pmu_lbr_init_skl(); + intel_pmu_lbr_init_skl(&x86_pmu.lbr); x86_pmu.event_constraints = intel_slm_event_constraints; x86_pmu.extra_regs = intel_tnt_extra_regs; @@ -4805,7 +4805,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_nhm(); + intel_pmu_lbr_init_nhm(&x86_pmu.lbr); x86_pmu.event_constraints = intel_westmere_event_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; @@ -4837,7 +4837,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_snb(); + intel_pmu_lbr_init_snb(&x86_pmu.lbr); x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; @@ -4879,7 +4879,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_snb(); + intel_pmu_lbr_init_snb(&x86_pmu.lbr); x86_pmu.event_constraints = intel_ivb_event_constraints; x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; @@ -4917,7 +4917,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_hsw(); + intel_pmu_lbr_init_hsw(&x86_pmu.lbr); x86_pmu.event_constraints = intel_hsw_event_constraints; x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; @@ -4959,7 +4959,7 @@ __init int intel_pmu_init(void) hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; - intel_pmu_lbr_init_hsw(); + intel_pmu_lbr_init_hsw(&x86_pmu.lbr); x86_pmu.event_constraints = intel_bdw_event_constraints; x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints; @@ -4988,7 +4988,7 @@ __init int intel_pmu_init(void) slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_knl(); + intel_pmu_lbr_init_knl(&x86_pmu.lbr); x86_pmu.event_constraints = intel_slm_event_constraints; x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; @@ -5015,7 +5015,7 @@ __init int intel_pmu_init(void) x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_skl(); + intel_pmu_lbr_init_skl(&x86_pmu.lbr); /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */ event_attr_td_recovery_bubbles.event_str_noht = @@ -5065,7 +5065,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - intel_pmu_lbr_init_skl(); + intel_pmu_lbr_init_skl(&x86_pmu.lbr); x86_pmu.event_constraints = intel_icl_event_constraints; x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints; @@ -5159,8 +5159,8 @@ __init int intel_pmu_init(void) if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) x86_pmu.lbr_nr = 0; for (i = 0; i < x86_pmu.lbr_nr; i++) { - if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && - check_msr(x86_pmu.lbr_to + i, 0xffffUL))) + if (!(check_msr(x86_pmu.lbr.from + i, 0xffffUL) && + check_msr(x86_pmu.lbr.to + i, 0xffffUL))) x86_pmu.lbr_nr = 0; } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 65113b16804a..ea971fd767af 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -199,7 +199,7 @@ static void intel_pmu_lbr_reset_32(void) int i; for (i = 0; i < x86_pmu.lbr_nr; i++) - wrmsrl(x86_pmu.lbr_from + i, 0); + wrmsrl(x86_pmu.lbr.from + i, 0); } static void intel_pmu_lbr_reset_64(void) @@ -207,8 +207,8 @@ static void intel_pmu_lbr_reset_64(void) int i; for (i = 0; i < x86_pmu.lbr_nr; i++) { - wrmsrl(x86_pmu.lbr_from + i, 0); - wrmsrl(x86_pmu.lbr_to + i, 0); + wrmsrl(x86_pmu.lbr.from + i, 0); + wrmsrl(x86_pmu.lbr.to + i, 0); if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + i, 0); } @@ -311,19 +311,19 @@ static u64 lbr_from_signext_quirk_rd(u64 val) static inline void wrlbr_from(unsigned int idx, u64 val) { val = lbr_from_signext_quirk_wr(val); - wrmsrl(x86_pmu.lbr_from + idx, val); + wrmsrl(x86_pmu.lbr.from + idx, val); } static inline void wrlbr_to(unsigned int idx, u64 val) { - wrmsrl(x86_pmu.lbr_to + idx, val); + wrmsrl(x86_pmu.lbr.to + idx, val); } static inline u64 rdlbr_from(unsigned int idx) { u64 val; - rdmsrl(x86_pmu.lbr_from + idx, val); + rdmsrl(x86_pmu.lbr.from + idx, val); return lbr_from_signext_quirk_rd(val); } @@ -332,7 +332,7 @@ static inline u64 rdlbr_to(unsigned int idx) { u64 val; - rdmsrl(x86_pmu.lbr_to + idx, val); + rdmsrl(x86_pmu.lbr.to + idx, val); return val; } @@ -572,7 +572,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) u64 lbr; } msr_lastbranch; - rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); + rdmsrl(x86_pmu.lbr.from + lbr_idx, msr_lastbranch.lbr); cpuc->lbr_entries[i].from = msr_lastbranch.from; cpuc->lbr_entries[i].to = msr_lastbranch.to; @@ -1199,12 +1199,12 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { }; /* core */ -void __init intel_pmu_lbr_init_core(void) +void __init intel_pmu_lbr_init_core(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 4; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_CORE_FROM; - x86_pmu.lbr_to = MSR_LBR_CORE_TO; + lbr->nr = 4; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_CORE_FROM; + lbr->to = MSR_LBR_CORE_TO; /* * SW branch filter usage: @@ -1213,15 +1213,15 @@ void __init intel_pmu_lbr_init_core(void) } /* nehalem/westmere */ -void __init intel_pmu_lbr_init_nhm(void) +void __init intel_pmu_lbr_init_nhm(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 16; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; + lbr->nr = 16; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_NHM_FROM; + lbr->to = MSR_LBR_NHM_TO; - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = nhm_lbr_sel_map; + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = nhm_lbr_sel_map; /* * SW branch filter usage: @@ -1233,15 +1233,15 @@ void __init intel_pmu_lbr_init_nhm(void) } /* sandy bridge */ -void __init intel_pmu_lbr_init_snb(void) +void __init intel_pmu_lbr_init_snb(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 16; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; + lbr->nr = 16; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_NHM_FROM; + lbr->to = MSR_LBR_NHM_TO; - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = snb_lbr_sel_map; + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = snb_lbr_sel_map; /* * SW branch filter usage: @@ -1252,30 +1252,30 @@ void __init intel_pmu_lbr_init_snb(void) } /* haswell */ -void intel_pmu_lbr_init_hsw(void) +void intel_pmu_lbr_init_hsw(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 16; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; + lbr->nr = 16; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_NHM_FROM; + lbr->to = MSR_LBR_NHM_TO; - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = hsw_lbr_sel_map; if (lbr_from_signext_quirk_needed()) static_branch_enable(&lbr_from_quirk_key); } /* skylake */ -__init void intel_pmu_lbr_init_skl(void) +__init void intel_pmu_lbr_init_skl(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 32; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; + lbr->nr = 32; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_NHM_FROM; + lbr->to = MSR_LBR_NHM_TO; - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = hsw_lbr_sel_map; /* * SW branch filter usage: @@ -1286,7 +1286,7 @@ __init void intel_pmu_lbr_init_skl(void) } /* atom */ -void __init intel_pmu_lbr_init_atom(void) +void __init intel_pmu_lbr_init_atom(struct x86_pmu_lbr *lbr) { /* * only models starting at stepping 10 seems @@ -1299,10 +1299,10 @@ void __init intel_pmu_lbr_init_atom(void) return; } - x86_pmu.lbr_nr = 8; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_CORE_FROM; - x86_pmu.lbr_to = MSR_LBR_CORE_TO; + lbr->nr = 8; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_CORE_FROM; + lbr->to = MSR_LBR_CORE_TO; /* * SW branch filter usage: @@ -1311,15 +1311,15 @@ void __init intel_pmu_lbr_init_atom(void) } /* slm */ -void __init intel_pmu_lbr_init_slm(void) +void __init intel_pmu_lbr_init_slm(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 8; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_CORE_FROM; - x86_pmu.lbr_to = MSR_LBR_CORE_TO; + lbr->nr = 8; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_CORE_FROM; + lbr->to = MSR_LBR_CORE_TO; - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = nhm_lbr_sel_map; + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = nhm_lbr_sel_map; /* * SW branch filter usage: @@ -1329,15 +1329,15 @@ void __init intel_pmu_lbr_init_slm(void) } /* Knights Landing */ -void intel_pmu_lbr_init_knl(void) +void intel_pmu_lbr_init_knl(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 8; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; + lbr->nr = 8; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_NHM_FROM; + lbr->to = MSR_LBR_NHM_TO; - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = snb_lbr_sel_map; + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = snb_lbr_sel_map; /* Knights Landing does have MISPREDICT bit */ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index f1cd1ca1a77b..b48cf9ed405c 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -672,12 +672,7 @@ struct x86_pmu { /* * Intel LBR */ - unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ - int lbr_nr; /* hardware stack size */ - u64 lbr_sel_mask; /* LBR_SELECT valid bits */ - const int *lbr_sel_map; /* lbr_select mappings */ - bool lbr_double_abort; /* duplicated lbr aborts */ - bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ + struct x86_pmu_lbr lbr; /* * Intel PT/LBR/BTS are exclusive @@ -1063,21 +1058,21 @@ void intel_pmu_lbr_disable_all(void); void intel_pmu_lbr_read(void); -void intel_pmu_lbr_init_core(void); +void intel_pmu_lbr_init_core(struct x86_pmu_lbr *lbr); -void intel_pmu_lbr_init_nhm(void); +void intel_pmu_lbr_init_nhm(struct x86_pmu_lbr *lbr); -void intel_pmu_lbr_init_atom(void); +void intel_pmu_lbr_init_atom(struct x86_pmu_lbr *lbr); -void intel_pmu_lbr_init_slm(void); +void intel_pmu_lbr_init_slm(struct x86_pmu_lbr *lbr); -void intel_pmu_lbr_init_snb(void); +void intel_pmu_lbr_init_snb(struct x86_pmu_lbr *lbr); -void intel_pmu_lbr_init_hsw(void); +void intel_pmu_lbr_init_hsw(struct x86_pmu_lbr *lbr); -void intel_pmu_lbr_init_skl(void); +void intel_pmu_lbr_init_skl(struct x86_pmu_lbr *lbr); -void intel_pmu_lbr_init_knl(void); +void intel_pmu_lbr_init_knl(struct x86_pmu_lbr *lbr); void intel_pmu_pebs_data_source_nhm(void); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index c5b3c137ca8c..721f4f2034c5 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -152,6 +152,26 @@ struct x86_pmu_capability { int events_mask_len; }; +struct x86_pmu_lbr { + unsigned long tos, from, to; /* MSR base regs */ + int nr; /* hardware stack size */ + u64 sel_mask; /* LBR_SELECT valid bits */ + const int *sel_map; /* lbr_select mappings */ + bool double_abort; /* duplicated lbr aborts */ + bool pt_coexist; /* (LBR|BTS) may coexist with PT */ +}; + +/* + * Convenience defines to keep more of the code unchanged - it makes it + * easier to rebase the kernel to newer versions. + */ +#define lbr_tos lbr.tos +#define lbr_nr lbr.nr +#define lbr_sel_mask lbr.sel_mask +#define lbr_sel_map lbr.sel_map +#define lbr_double_abort lbr.double_abort +#define lbr_pt_coexist lbr.pt_coexist + /* * Fixed-purpose performance events: */ -- 2.27.0 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel