On 5/28/2019 9:30 AM, Peter Zijlstra wrote:
On Tue, May 21, 2019 at 02:40:50PM -0700, kan.li...@linux.intel.com wrote:
+static u64 icl_metric_update_event(struct perf_event *event, u64 val)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       u64 newval, metric, slots_val = 0, new, last;
+       bool nmi = in_nmi();
+       int txn_flags = nmi ? 0 : cpuc->txn_flags;
+
+       /*
+        * Use cached value for transaction.
+        */
+       newval = 0;
+       if (txn_flags) {
+               newval = cpuc->txn_metric;
+               slots_val = cpuc->txn_slots;
+       } else if (nmi) {
+               newval = cpuc->nmi_metric;
+               slots_val = cpuc->nmi_slots;
+       }
+
+       if (!newval) {
+               slots_val = val;
+
+               rdpmcl((1<<29), newval);
+               if (txn_flags) {
+                       cpuc->txn_metric = newval;
+                       cpuc->txn_slots = slots_val;
+               } else if (nmi) {
+                       cpuc->nmi_metric = newval;
+                       cpuc->nmi_slots = slots_val;
+               }
+
+               if (!(txn_flags & PERF_PMU_TXN_REMOVE)) {
+                       /* Reset the metric value when reading
+                        * The SLOTS register must be reset when PERF_METRICS 
reset,
+                        * otherwise PERF_METRICS may has wrong output.
+                        */

broken comment style.. (and grammer)

Missed a full stop.
Should be "Reset the metric value for each read."

+                       wrmsrl(MSR_PERF_METRICS, 0);
+                       wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);

I don't get this, overflow happens on when we flip sign, so why is
programming 0 a sane thing to do?

Reset the counters (programming 0) don't trigger overflow.
We have to reset both registers for each read to avoid the known PERF_METRICS issue.



+                       hwc->saved_metric = 0;
+                       hwc->saved_slots = 0;
+               } else {
+                       /* saved metric and slots for context switch */
+                       hwc->saved_metric = newval;
+                       hwc->saved_slots = val;
+
+               }
+               /* cache the last metric and slots */
+               cpuc->last_metric = hwc->last_metric;
+               cpuc->last_slots = hwc->last_slots;
+               hwc->last_metric = 0;
+               hwc->last_slots = 0;
+       }
+
+       /* The METRICS and SLOTS have been reset when reading */
+       if (!(txn_flags & PERF_PMU_TXN_REMOVE))
+               local64_set(&hwc->prev_count, 0);
+
+       if (is_slots_event(event))
+               return (slots_val - cpuc->last_slots);
+
+       /*
+        * The metric is reported as an 8bit integer percentage
+        * suming up to 0xff. As the counter is less than 64bits
+        * we can use the not used bits to get the needed precision.
+        * Use 16bit fixed point arithmetic for
+        * slots-in-metric = (MetricPct / 0xff) * val
+        * This works fine for upto 48bit counters, but will
+        * lose precision above that.
+        */
+
+       metric = (cpuc->last_metric >> ((hwc->idx - 
INTEL_PMC_IDX_FIXED_METRIC_BASE)*8)) & 0xff;
+       last = (((metric * 0xffff) >> 8) * cpuc->last_slots) >> 16;

How is that cpuc->last_* crap not broken for NMIs ?

There should be no NMI for slots or metric events at the moment, because the MSR_PERF_METRICS and MSR_CORE_PERF_FIXED_CTR3 are reset in first read.
Other NMIs will not touch the codes here.

Thanks,
Kan


+
+       metric = (newval >> ((hwc->idx - INTEL_PMC_IDX_FIXED_METRIC_BASE)*8)) & 
0xff;
+       new = (((metric * 0xffff) >> 8) * slots_val) >> 16;
+
+       return (new - last);
+}


This is diguisting.. and unreadable.

mul_u64_u32_shr() is actually really fast, use it.

Reply via email to