On Tue, May 21, 2019 at 02:40:50PM -0700, kan.li...@linux.intel.com wrote: > +static u64 icl_metric_update_event(struct perf_event *event, u64 val) > +{ > + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); > + struct hw_perf_event *hwc = &event->hw; > + u64 newval, metric, slots_val = 0, new, last; > + bool nmi = in_nmi(); > + int txn_flags = nmi ? 0 : cpuc->txn_flags; > + > + /* > + * Use cached value for transaction. > + */ > + newval = 0; > + if (txn_flags) { > + newval = cpuc->txn_metric; > + slots_val = cpuc->txn_slots; > + } else if (nmi) { > + newval = cpuc->nmi_metric; > + slots_val = cpuc->nmi_slots; > + } > + > + if (!newval) { > + slots_val = val; > + > + rdpmcl((1<<29), newval); > + if (txn_flags) { > + cpuc->txn_metric = newval; > + cpuc->txn_slots = slots_val; > + } else if (nmi) { > + cpuc->nmi_metric = newval; > + cpuc->nmi_slots = slots_val; > + } > + > + if (!(txn_flags & PERF_PMU_TXN_REMOVE)) { > + /* Reset the metric value when reading > + * The SLOTS register must be reset when PERF_METRICS > reset, > + * otherwise PERF_METRICS may has wrong output. > + */
broken comment style.. (and grammer) > + wrmsrl(MSR_PERF_METRICS, 0); > + wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); I don't get this, overflow happens on when we flip sign, so why is programming 0 a sane thing to do? > + hwc->saved_metric = 0; > + hwc->saved_slots = 0; > + } else { > + /* saved metric and slots for context switch */ > + hwc->saved_metric = newval; > + hwc->saved_slots = val; > + > + } > + /* cache the last metric and slots */ > + cpuc->last_metric = hwc->last_metric; > + cpuc->last_slots = hwc->last_slots; > + hwc->last_metric = 0; > + hwc->last_slots = 0; > + } > + > + /* The METRICS and SLOTS have been reset when reading */ > + if (!(txn_flags & PERF_PMU_TXN_REMOVE)) > + local64_set(&hwc->prev_count, 0); > + > + if (is_slots_event(event)) > + return (slots_val - cpuc->last_slots); > + > + /* > + * The metric is reported as an 8bit integer percentage > + * suming up to 0xff. As the counter is less than 64bits > + * we can use the not used bits to get the needed precision. > + * Use 16bit fixed point arithmetic for > + * slots-in-metric = (MetricPct / 0xff) * val > + * This works fine for upto 48bit counters, but will > + * lose precision above that. > + */ > + > + metric = (cpuc->last_metric >> ((hwc->idx - > INTEL_PMC_IDX_FIXED_METRIC_BASE)*8)) & 0xff; > + last = (((metric * 0xffff) >> 8) * cpuc->last_slots) >> 16; How is that cpuc->last_* crap not broken for NMIs ? > + > + metric = (newval >> ((hwc->idx - INTEL_PMC_IDX_FIXED_METRIC_BASE)*8)) & > 0xff; > + new = (((metric * 0xffff) >> 8) * slots_val) >> 16; > + > + return (new - last); > +} This is diguisting.. and unreadable. mul_u64_u32_shr() is actually really fast, use it.