On Mon, Mar 24, 2025 at 05:30:45PM +0000, Mingwei Zhang wrote: > From: Kan Liang <kan.li...@linux.intel.com> > > Only KVM knows the exact time when a guest is entering/exiting. Expose > two interfaces to KVM to switch the ownership of the PMU resources. > > All the pinned events must be scheduled in first. Extend the > perf_event_sched_in() helper to support extra flag, e.g., EVENT_GUEST. > > Signed-off-by: Kan Liang <kan.li...@linux.intel.com> > Signed-off-by: Mingwei Zhang <mizh...@google.com> > --- > include/linux/perf_event.h | 4 ++ > kernel/events/core.c | 80 ++++++++++++++++++++++++++++++++++---- > 2 files changed, 77 insertions(+), 7 deletions(-) > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index 7bda1e20be12..37187ee8e226 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -1822,6 +1822,8 @@ extern int perf_event_period(struct perf_event *event, > u64 value); > extern u64 perf_event_pause(struct perf_event *event, bool reset); > int perf_get_mediated_pmu(void); > void perf_put_mediated_pmu(void); > +void perf_guest_enter(void); > +void perf_guest_exit(void); > #else /* !CONFIG_PERF_EVENTS: */ > static inline void * > perf_aux_output_begin(struct perf_output_handle *handle, > @@ -1919,6 +1921,8 @@ static inline int perf_get_mediated_pmu(void) > } > > static inline void perf_put_mediated_pmu(void) { } > +static inline void perf_guest_enter(void) { } > +static inline void perf_guest_exit(void) { } > #endif > > #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 7a2115b2c5c1..d05487d465c9 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -2827,14 +2827,15 @@ static void task_ctx_sched_out(struct > perf_event_context *ctx, > > static void perf_event_sched_in(struct perf_cpu_context *cpuctx, > struct perf_event_context *ctx, > - struct pmu *pmu) > + struct pmu *pmu, > + enum event_type_t event_type) > { > - ctx_sched_in(&cpuctx->ctx, pmu, EVENT_PINNED); > + ctx_sched_in(&cpuctx->ctx, pmu, EVENT_PINNED | event_type); > if (ctx) > - ctx_sched_in(ctx, pmu, EVENT_PINNED); > - ctx_sched_in(&cpuctx->ctx, pmu, EVENT_FLEXIBLE); > + ctx_sched_in(ctx, pmu, EVENT_PINNED | event_type); > + ctx_sched_in(&cpuctx->ctx, pmu, EVENT_FLEXIBLE | event_type); > if (ctx) > - ctx_sched_in(ctx, pmu, EVENT_FLEXIBLE); > + ctx_sched_in(ctx, pmu, EVENT_FLEXIBLE | event_type); > } > > /* > @@ -2890,7 +2891,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, > else if (event_type & EVENT_PINNED) > ctx_sched_out(&cpuctx->ctx, pmu, EVENT_FLEXIBLE); > > - perf_event_sched_in(cpuctx, task_ctx, pmu); > + perf_event_sched_in(cpuctx, task_ctx, pmu, 0); > > for_each_epc(epc, &cpuctx->ctx, pmu, 0) > perf_pmu_enable(epc->pmu); > @@ -4188,7 +4189,7 @@ static void perf_event_context_sched_in(struct > task_struct *task) > ctx_sched_out(&cpuctx->ctx, NULL, EVENT_FLEXIBLE); > } > > - perf_event_sched_in(cpuctx, ctx, NULL); > + perf_event_sched_in(cpuctx, ctx, NULL, 0); > > perf_ctx_sched_task_cb(cpuctx->task_ctx, true); > > @@ -6040,6 +6041,71 @@ void perf_put_mediated_pmu(void) > } > EXPORT_SYMBOL_GPL(perf_put_mediated_pmu); > > +static inline void perf_host_exit(struct perf_cpu_context *cpuctx) > +{ > + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); > + ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST); > + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); > + if (cpuctx->task_ctx) { > + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); > + task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST); > + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); > + } > +}
Cpu context and task context may have events in the same PMU. How about this? perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); if (cpuctx->task_ctx) perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST); if (cpuctx->task_ctx) task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST); if (cpuctx->task_ctx) perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); Thanks, Namhyung > + > +/* When entering a guest, schedule out all exclude_guest events. */ > +void perf_guest_enter(void) > +{ > + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); > + > + lockdep_assert_irqs_disabled(); > + > + perf_ctx_lock(cpuctx, cpuctx->task_ctx); > + > + if (WARN_ON_ONCE(__this_cpu_read(perf_in_guest))) > + goto unlock; > + > + perf_host_exit(cpuctx); > + > + __this_cpu_write(perf_in_guest, true); > + > +unlock: > + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > +} > +EXPORT_SYMBOL_GPL(perf_guest_enter); > + > +static inline void perf_host_enter(struct perf_cpu_context *cpuctx) > +{ > + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); > + if (cpuctx->task_ctx) > + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); > + > + perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST); > + > + if (cpuctx->task_ctx) > + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); > + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); > +} > + > +void perf_guest_exit(void) > +{ > + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); > + > + lockdep_assert_irqs_disabled(); > + > + perf_ctx_lock(cpuctx, cpuctx->task_ctx); > + > + if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest))) > + goto unlock; > + > + perf_host_enter(cpuctx); > + > + __this_cpu_write(perf_in_guest, false); > +unlock: > + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > +} > +EXPORT_SYMBOL_GPL(perf_guest_exit); > + > /* > * Holding the top-level event's child_mutex means that any > * descendant process that has inherited this event will block > -- > 2.49.0.395.g12beb8f557-goog >