On 2025-05-14 7:19 p.m., Sean Christopherson wrote:
> On Fri, Apr 25, 2025, Peter Zijlstra wrote:
>> On Mon, Mar 24, 2025 at 05:30:45PM +0000, Mingwei Zhang wrote:
>>
>>> @@ -6040,6 +6041,71 @@ void perf_put_mediated_pmu(void)
>>> }
>>> EXPORT_SYMBOL_GPL(perf_put_mediated_pmu);
>>>
>>> +static inline void perf_host_exit(struct perf_cpu_context *cpuctx)
>>> +{
>>> + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
>>> + ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST);
>>> + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
>>> + if (cpuctx->task_ctx) {
>>> + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
>>> + task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST);
>>> + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
>>> + }
>>> +}
>>> +
>>> +/* When entering a guest, schedule out all exclude_guest events. */
>>> +void perf_guest_enter(void)
>>> +{
>>> + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
>>> +
>>> + lockdep_assert_irqs_disabled();
>>> +
>>> + perf_ctx_lock(cpuctx, cpuctx->task_ctx);
>>> +
>>> + if (WARN_ON_ONCE(__this_cpu_read(perf_in_guest)))
>>> + goto unlock;
>>> +
>>> + perf_host_exit(cpuctx);
>>> +
>>> + __this_cpu_write(perf_in_guest, true);
>>> +
>>> +unlock:
>>> + perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
>>> +}
>>> +EXPORT_SYMBOL_GPL(perf_guest_enter);
>>> +
>>> +static inline void perf_host_enter(struct perf_cpu_context *cpuctx)
>>> +{
>>> + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
>>> + if (cpuctx->task_ctx)
>>> + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
>>> +
>>> + perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST);
>>> +
>>> + if (cpuctx->task_ctx)
>>> + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
>>> + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
>>> +}
>>> +
>>> +void perf_guest_exit(void)
>>> +{
>>> + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
>>> +
>>> + lockdep_assert_irqs_disabled();
>>> +
>>> + perf_ctx_lock(cpuctx, cpuctx->task_ctx);
>>> +
>>> + if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest)))
>>> + goto unlock;
>>> +
>>> + perf_host_enter(cpuctx);
>>> +
>>> + __this_cpu_write(perf_in_guest, false);
>>> +unlock:
>>> + perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
>>> +}
>>> +EXPORT_SYMBOL_GPL(perf_guest_exit);
>>
>> This naming is confusing on purpose? Pick either guest/host and stick
>> with it.
>
> +1. I also think the inner perf_host_{enter,exit}() helpers are superflous.
> These flows
>
> After a bit of hacking, and with a few spoilers, this is what I ended up with
> (not anywhere near fully tested). I like following KVM's kvm_xxx_{load,put}()
> nomenclature to tie everything together, so I went with "guest" instead of
> "host"
> even though the majority of work being down is to shedule out/in host context.
>
> /* When loading a guest's mediated PMU, schedule out all exclude_guest
> events. */
> void perf_load_guest_context(unsigned long data)
> {
> struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
>
> lockdep_assert_irqs_disabled();
>
> perf_ctx_lock(cpuctx, cpuctx->task_ctx);
>
> if (WARN_ON_ONCE(__this_cpu_read(guest_ctx_loaded)))
> goto unlock;
>
> perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
> ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST);
> perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
> if (cpuctx->task_ctx) {
> perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
> task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST);
> perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
> }
>
> arch_perf_load_guest_context(data);
>
> __this_cpu_write(guest_ctx_loaded, true);
>
> unlock:
> perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
> }
> EXPORT_SYMBOL_GPL(perf_load_guest_context);
>
> void perf_put_guest_context(void)
> {
> struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
>
> lockdep_assert_irqs_disabled();
>
> perf_ctx_lock(cpuctx, cpuctx->task_ctx);
>
> if (WARN_ON_ONCE(!__this_cpu_read(guest_ctx_loaded)))
> goto unlock;
>
> arch_perf_put_guest_context();
It will set the guest_ctx_loaded to false.
The update_context_time() invoked in the perf_event_sched_in() will not
get a chance to update the guest time.
I think something as below should work.
- Disable all in the PMU (disable global control)
- schedule in the host counters (but not run yet since the global
control of the PMU is disabled)
- arch_perf_put_guest_context()
- Enable all in the PMU (Enable global control. The host counters now start)
void perf_put_guest_context(void)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
lockdep_assert_irqs_disabled();
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
if (WARN_ON_ONCE(!__this_cpu_read(guest_ctx_loaded)))
goto unlock;
perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
if (cpuctx->task_ctx)
perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST);
arch_perf_put_guest_context();
if (cpuctx->task_ctx)
perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
unlock:
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
Similar to the perf_load_guest_context().
- Disable all in the PMU (disable global control)
- schedule out all the host counters
- arch_perf_load_guest_context()
- Enable all in the PMU (enable global control)
void perf_load_guest_context(unsigned long data)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
lockdep_assert_irqs_disabled();
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
if (WARN_ON_ONCE(__this_cpu_read(guest_ctx_loaded)))
goto unlock;
perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST);
if (cpuctx->task_ctx) {
perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST);
}
arch_perf_load_guest_context(data);
perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
if (cpuctx->task_ctx)
perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
unlock:
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
Thanks,
Kan
>
> perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
> if (cpuctx->task_ctx)
> perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
>
> perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST);
>
> if (cpuctx->task_ctx)
> perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
> perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
>
> __this_cpu_write(guest_ctx_loaded, false);
> unlock:> perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
> }
> EXPORT_SYMBOL_GPL(perf_put_guest_context);