On Fri, Sep 01, 2017 at 10:48:21PM -0700, Yonghong Song wrote: > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index b14095b..5a50808 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -898,7 +898,8 @@ perf_event_create_kernel_counter(struct perf_event_attr > *attr, > void *context); > extern void perf_pmu_migrate_context(struct pmu *pmu, > int src_cpu, int dst_cpu); > -int perf_event_read_local(struct perf_event *event, u64 *value); > +int perf_event_read_local(struct perf_event *event, u64 *value, > + u64 *enabled, u64 *running); > extern u64 perf_event_read_value(struct perf_event *event, > u64 *enabled, u64 *running); >
> diff --git a/kernel/events/core.c b/kernel/events/core.c > index 8c01572..20c4039 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -3670,7 +3670,8 @@ static inline u64 perf_event_count(struct perf_event > *event) > * will not be local and we cannot read them atomically > * - must not have a pmu::count method > */ > -int perf_event_read_local(struct perf_event *event, u64 *value) > +int perf_event_read_local(struct perf_event *event, u64 *value, > + u64 *enabled, u64 *running) > { > unsigned long flags; > int ret = 0; > @@ -3694,7 +3695,7 @@ int perf_event_read_local(struct perf_event *event, u64 > *value) > * It must not have a pmu::count method, those are not > * NMI safe. > */ > - if (event->pmu->count) { > + if (value && event->pmu->count) { > ret = -EOPNOTSUPP; > goto out; > } No, value _must_ be !NULL. Otherwise you allow getting timestamps independently from the count value and that is broken. The {value, enabled, running} tuple is temporally related. > @@ -3718,10 +3719,16 @@ int perf_event_read_local(struct perf_event *event, > u64 *value) > * or local to this CPU. Furthermore it means its ACTIVE (otherwise > * oncpu == -1). > */ > - if (event->oncpu == smp_processor_id()) > - event->pmu->read(event); > - > - *value = local64_read(&event->count); > + if (value) { > + if (event->oncpu == smp_processor_id()) > + event->pmu->read(event); > + *value = local64_read(&event->count); > + } > + if (enabled && running) { > + u64 ctx_time = event->shadow_ctx_time + perf_clock(); > + *enabled = ctx_time - event->tstamp_enabled; > + *running = ctx_time - event->tstamp_running; > + } > out: > local_irq_restore(flags); Please make that something like: u64 now = event->shadow_ctx_time + perf_clock(); if (enabled) *enabled = now - event->tstamp_enabled; if (event->oncpu == smp_processor_id()) { event->pmu->read(event); if (running) *running = now - event->tstamp_running; } else { *running = event->total_time_running; } And I'll fix it up when I make: https://lkml.kernel.org/r/20170831171837.njnc6r6elsvkl...@hirez.programming.kicks-ass.net happen.