intel: Support hardware TopDown metrics

Peter Zijlstra Tue, 28 May 2019 06:31:00 -0700

On Tue, May 21, 2019 at 02:40:50PM -0700, kan.li...@linux.intel.com wrote:
> +static u64 icl_metric_update_event(struct perf_event *event, u64 val)
> +{
> +     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +     struct hw_perf_event *hwc = &event->hw;
> +     u64 newval, metric, slots_val = 0, new, last;
> +     bool nmi = in_nmi();
> +     int txn_flags = nmi ? 0 : cpuc->txn_flags;
> +
> +     /*
> +      * Use cached value for transaction.
> +      */
> +     newval = 0;
> +     if (txn_flags) {
> +             newval = cpuc->txn_metric;
> +             slots_val = cpuc->txn_slots;
> +     } else if (nmi) {
> +             newval = cpuc->nmi_metric;
> +             slots_val = cpuc->nmi_slots;
> +     }
> +
> +     if (!newval) {
> +             slots_val = val;
> +
> +             rdpmcl((1<<29), newval);
> +             if (txn_flags) {
> +                     cpuc->txn_metric = newval;
> +                     cpuc->txn_slots = slots_val;
> +             } else if (nmi) {
> +                     cpuc->nmi_metric = newval;
> +                     cpuc->nmi_slots = slots_val;
> +             }
> +
> +             if (!(txn_flags & PERF_PMU_TXN_REMOVE)) {
> +                     /* Reset the metric value when reading
> +                      * The SLOTS register must be reset when PERF_METRICS 
> reset,
> +                      * otherwise PERF_METRICS may has wrong output.
> +                      */


broken comment style.. (and grammer)

> +                     wrmsrl(MSR_PERF_METRICS, 0);
> +                     wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);

I don't get this, overflow happens on when we flip sign, so why is
programming 0 a sane thing to do?

> +                     hwc->saved_metric = 0;
> +                     hwc->saved_slots = 0;
> +             } else {
> +                     /* saved metric and slots for context switch */
> +                     hwc->saved_metric = newval;
> +                     hwc->saved_slots = val;
> +
> +             }
> +             /* cache the last metric and slots */
> +             cpuc->last_metric = hwc->last_metric;
> +             cpuc->last_slots = hwc->last_slots;
> +             hwc->last_metric = 0;
> +             hwc->last_slots = 0;
> +     }
> +
> +     /* The METRICS and SLOTS have been reset when reading */
> +     if (!(txn_flags & PERF_PMU_TXN_REMOVE))
> +             local64_set(&hwc->prev_count, 0);
> +
> +     if (is_slots_event(event))
> +             return (slots_val - cpuc->last_slots);
> +
> +     /*
> +      * The metric is reported as an 8bit integer percentage
> +      * suming up to 0xff. As the counter is less than 64bits
> +      * we can use the not used bits to get the needed precision.
> +      * Use 16bit fixed point arithmetic for
> +      * slots-in-metric = (MetricPct / 0xff) * val
> +      * This works fine for upto 48bit counters, but will
> +      * lose precision above that.
> +      */
> +
> +     metric = (cpuc->last_metric >> ((hwc->idx - 
> INTEL_PMC_IDX_FIXED_METRIC_BASE)*8)) & 0xff;
> +     last = (((metric * 0xffff) >> 8) * cpuc->last_slots) >> 16;

How is that cpuc->last_* crap not broken for NMIs ?

> +
> +     metric = (newval >> ((hwc->idx - INTEL_PMC_IDX_FIXED_METRIC_BASE)*8)) & 
> 0xff;
> +     new = (((metric * 0xffff) >> 8) * slots_val) >> 16;
> +
> +     return (new - last);
> +}


This is diguisting.. and unreadable.

mul_u64_u32_shr() is actually really fast, use it.

Re: [PATCH 4/9] perf/x86/intel: Support hardware TopDown metrics

Reply via email to