On Fri, Sep 01, 2017 at 10:48:21PM -0700, Yonghong Song wrote:
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index b14095b..5a50808 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -898,7 +898,8 @@ perf_event_create_kernel_counter(struct perf_event_attr 
> *attr,
>                               void *context);
>  extern void perf_pmu_migrate_context(struct pmu *pmu,
>                               int src_cpu, int dst_cpu);
> -int perf_event_read_local(struct perf_event *event, u64 *value);
> +int perf_event_read_local(struct perf_event *event, u64 *value,
> +                       u64 *enabled, u64 *running);
>  extern u64 perf_event_read_value(struct perf_event *event,
>                                u64 *enabled, u64 *running);
>  

> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 8c01572..20c4039 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -3670,7 +3670,8 @@ static inline u64 perf_event_count(struct perf_event 
> *event)
>   *     will not be local and we cannot read them atomically
>   *   - must not have a pmu::count method
>   */
> -int perf_event_read_local(struct perf_event *event, u64 *value)
> +int perf_event_read_local(struct perf_event *event, u64 *value,
> +                       u64 *enabled, u64 *running)
>  {
>       unsigned long flags;
>       int ret = 0;
> @@ -3694,7 +3695,7 @@ int perf_event_read_local(struct perf_event *event, u64 
> *value)
>        * It must not have a pmu::count method, those are not
>        * NMI safe.
>        */
> -     if (event->pmu->count) {
> +     if (value && event->pmu->count) {
>               ret = -EOPNOTSUPP;
>               goto out;
>       }

No, value _must_ be !NULL. Otherwise you allow getting timestamps
independently from the count value and that is broken.

The {value, enabled, running} tuple is temporally related.

> @@ -3718,10 +3719,16 @@ int perf_event_read_local(struct perf_event *event, 
> u64 *value)
>        * or local to this CPU. Furthermore it means its ACTIVE (otherwise
>        * oncpu == -1).
>        */
> -     if (event->oncpu == smp_processor_id())
> -             event->pmu->read(event);
> -
> -     *value = local64_read(&event->count);
> +     if (value) {
> +             if (event->oncpu == smp_processor_id())
> +                     event->pmu->read(event);
> +             *value = local64_read(&event->count);
> +     }
> +     if (enabled && running) {
> +             u64 ctx_time = event->shadow_ctx_time + perf_clock();
> +             *enabled = ctx_time - event->tstamp_enabled;
> +             *running = ctx_time - event->tstamp_running;
> +     }
>  out:
>       local_irq_restore(flags);

Please make that something like:


        u64 now = event->shadow_ctx_time + perf_clock();

        if (enabled)
                *enabled = now - event->tstamp_enabled;

        if (event->oncpu == smp_processor_id()) {
                event->pmu->read(event);
                if (running)
                        *running = now - event->tstamp_running;
        } else {
                *running = event->total_time_running;
        }

And I'll fix it up when I make:

  
https://lkml.kernel.org/r/20170831171837.njnc6r6elsvkl...@hirez.programming.kicks-ass.net

happen.

Reply via email to