On 2025-05-14 7:19 p.m., Sean Christopherson wrote:
> On Fri, Apr 25, 2025, Peter Zijlstra wrote:
>> On Mon, Mar 24, 2025 at 05:30:45PM +0000, Mingwei Zhang wrote:
>>
>>> @@ -6040,6 +6041,71 @@ void perf_put_mediated_pmu(void)
>>>  }
>>>  EXPORT_SYMBOL_GPL(perf_put_mediated_pmu);
>>>  
>>> +static inline void perf_host_exit(struct perf_cpu_context *cpuctx)
>>> +{
>>> +   perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
>>> +   ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST);
>>> +   perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
>>> +   if (cpuctx->task_ctx) {
>>> +           perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
>>> +           task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST);
>>> +           perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
>>> +   }
>>> +}
>>> +
>>> +/* When entering a guest, schedule out all exclude_guest events. */
>>> +void perf_guest_enter(void)
>>> +{
>>> +   struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
>>> +
>>> +   lockdep_assert_irqs_disabled();
>>> +
>>> +   perf_ctx_lock(cpuctx, cpuctx->task_ctx);
>>> +
>>> +   if (WARN_ON_ONCE(__this_cpu_read(perf_in_guest)))
>>> +           goto unlock;
>>> +
>>> +   perf_host_exit(cpuctx);
>>> +
>>> +   __this_cpu_write(perf_in_guest, true);
>>> +
>>> +unlock:
>>> +   perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
>>> +}
>>> +EXPORT_SYMBOL_GPL(perf_guest_enter);
>>> +
>>> +static inline void perf_host_enter(struct perf_cpu_context *cpuctx)
>>> +{
>>> +   perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
>>> +   if (cpuctx->task_ctx)
>>> +           perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
>>> +
>>> +   perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST);
>>> +
>>> +   if (cpuctx->task_ctx)
>>> +           perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
>>> +   perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
>>> +}
>>> +
>>> +void perf_guest_exit(void)
>>> +{
>>> +   struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
>>> +
>>> +   lockdep_assert_irqs_disabled();
>>> +
>>> +   perf_ctx_lock(cpuctx, cpuctx->task_ctx);
>>> +
>>> +   if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest)))
>>> +           goto unlock;
>>> +
>>> +   perf_host_enter(cpuctx);
>>> +
>>> +   __this_cpu_write(perf_in_guest, false);
>>> +unlock:
>>> +   perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
>>> +}
>>> +EXPORT_SYMBOL_GPL(perf_guest_exit);
>>
>> This naming is confusing on purpose? Pick either guest/host and stick
>> with it.
> 
> +1.  I also think the inner perf_host_{enter,exit}() helpers are superflous.
> These flows
> 
> After a bit of hacking, and with a few spoilers, this is what I ended up with
> (not anywhere near fully tested).  I like following KVM's kvm_xxx_{load,put}()
> nomenclature to tie everything together, so I went with "guest" instead of 
> "host"
> even though the majority of work being down is to shedule out/in host context.
> 
> /* When loading a guest's mediated PMU, schedule out all exclude_guest 
> events. */
> void perf_load_guest_context(unsigned long data)
> {
>       struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
> 
>       lockdep_assert_irqs_disabled();
> 
>       perf_ctx_lock(cpuctx, cpuctx->task_ctx);
> 
>       if (WARN_ON_ONCE(__this_cpu_read(guest_ctx_loaded)))
>               goto unlock;
> 
>       perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
>       ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST);
>       perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
>       if (cpuctx->task_ctx) {
>               perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
>               task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST);
>               perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
>       }
> 
>       arch_perf_load_guest_context(data);
> 
>       __this_cpu_write(guest_ctx_loaded, true);
> 
> unlock:
>       perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
> }
> EXPORT_SYMBOL_GPL(perf_load_guest_context);
> 
> void perf_put_guest_context(void)
> {
>       struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
> 
>       lockdep_assert_irqs_disabled();
> 
>       perf_ctx_lock(cpuctx, cpuctx->task_ctx);
> 
>       if (WARN_ON_ONCE(!__this_cpu_read(guest_ctx_loaded)))
>               goto unlock;
> 
>       arch_perf_put_guest_context();

It will set the guest_ctx_loaded to false.
The update_context_time() invoked in the perf_event_sched_in() will not
get a chance to update the guest time.

I think something as below should work.

- Disable all in the PMU (disable global control)
- schedule in the host counters (but not run yet since the global
control of the PMU is disabled)
- arch_perf_put_guest_context()
- Enable all in the PMU (Enable global control. The host counters now start)

void perf_put_guest_context(void)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);

        lockdep_assert_irqs_disabled();

        perf_ctx_lock(cpuctx, cpuctx->task_ctx);

        if (WARN_ON_ONCE(!__this_cpu_read(guest_ctx_loaded)))
                goto unlock;

        perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
        if (cpuctx->task_ctx)
                perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);

        perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST);

        arch_perf_put_guest_context();

        if (cpuctx->task_ctx)
                perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
        perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);

unlock:
        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}



Similar to the perf_load_guest_context().

- Disable all in the PMU (disable global control)
- schedule out all the host counters
- arch_perf_load_guest_context()
- Enable all in the PMU (enable global control)

void perf_load_guest_context(unsigned long data)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);

        lockdep_assert_irqs_disabled();

        perf_ctx_lock(cpuctx, cpuctx->task_ctx);

        if (WARN_ON_ONCE(__this_cpu_read(guest_ctx_loaded)))
                goto unlock;

        perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
        ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST);
        if (cpuctx->task_ctx) {
                perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
                task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST);
        }

        arch_perf_load_guest_context(data);

        perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
        if (cpuctx->task_ctx)
                perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
unlock:
        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}


Thanks,
Kan

> 
>       perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
>       if (cpuctx->task_ctx)
>               perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
> 
>       perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST);
> 
>       if (cpuctx->task_ctx)
>               perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
>       perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
> 
>       __this_cpu_write(guest_ctx_loaded, false);
> unlock:>      perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
> }
> EXPORT_SYMBOL_GPL(perf_put_guest_context);


Reply via email to