On Mon, Jul 27, 2015 at 04:46:16AM -0400, kan.li...@intel.com wrote:

As a general comment; this thing is unreadable. Far too much macro foo
to instantiate the different PMUs.

> +struct perf_power_cstate_event_msr {
> +     int     id;
> +     u64     msr;
> +};
> +
> +enum perf_power_cstate_id {
> +     /*
> +      * power_cstate events, generalized by the kernel:
> +      */
> +     PERF_POWER_CORE_C1_RES = 0,
> +     PERF_POWER_CORE_C3_RES,
> +     PERF_POWER_CORE_C6_RES,
> +     PERF_POWER_CORE_C7_RES,

These are two different PMUs, why are they in the same enum space?

> +     PERF_POWER_PKG_C2_RES,
> +     PERF_POWER_PKG_C3_RES,
> +     PERF_POWER_PKG_C6_RES,
> +     PERF_POWER_PKG_C7_RES,
> +     PERF_POWER_PKG_C8_RES,
> +     PERF_POWER_PKG_C9_RES,
> +     PERF_POWER_PKG_C10_RES,
> +
> +     PERF_POWER_CSTATE_EVENT_MAX,       /* non-ABI */
> +};
> +
> +struct power_cstate_pmu {
> +     struct intel_power_cstate_type  *power_cstate_type;
> +     struct pmu                      *pmu;
> +};
> +
> +
> +static struct intel_power_cstate_type *empty_power_cstate[] = { NULL, };
> +struct intel_power_cstate_type **power_cstate = empty_power_cstate;
> +
> +static struct perf_power_cstate_event_msr power_cstate_events[] = {
> +     { PERF_POWER_CORE_C1_RES, MSR_CORE_C1_RES },
> +     { PERF_POWER_CORE_C3_RES, MSR_CORE_C3_RESIDENCY },
> +     { PERF_POWER_CORE_C6_RES, MSR_CORE_C6_RESIDENCY },
> +     { PERF_POWER_CORE_C7_RES, MSR_CORE_C7_RESIDENCY },
> +     { PERF_POWER_PKG_C2_RES, MSR_PKG_C2_RESIDENCY },
> +     { PERF_POWER_PKG_C3_RES, MSR_PKG_C3_RESIDENCY },
> +     { PERF_POWER_PKG_C6_RES, MSR_PKG_C6_RESIDENCY },
> +     { PERF_POWER_PKG_C7_RES, MSR_PKG_C7_RESIDENCY },
> +     { PERF_POWER_PKG_C8_RES, MSR_PKG_C8_RESIDENCY },
> +     { PERF_POWER_PKG_C9_RES, MSR_PKG_C9_RESIDENCY },
> +     { PERF_POWER_PKG_C10_RES, MSR_PKG_C10_RESIDENCY },
> +};

What's the point of the first entry? You already know which index it is,
no point in storing that again.

> +
> +EVENT_ATTR_STR(c1-residency, power_core_c1_res, "event=0x00");
> +EVENT_ATTR_STR(c3-residency, power_core_c3_res, "event=0x01");
> +EVENT_ATTR_STR(c6-residency, power_core_c6_res, "event=0x02");
> +EVENT_ATTR_STR(c7-residency, power_core_c7_res, "event=0x03");

> +EVENT_ATTR_STR(c2-residency, power_pkg_c2_res, "event=0x04");

I would very much expect this thing to start counting at 0 again.

> +EVENT_ATTR_STR(c3-residency, power_pkg_c3_res, "event=0x05");
> +EVENT_ATTR_STR(c6-residency, power_pkg_c6_res, "event=0x06");
> +EVENT_ATTR_STR(c7-residency, power_pkg_c7_res, "event=0x07");
> +EVENT_ATTR_STR(c8-residency, power_pkg_c8_res, "event=0x08");
> +EVENT_ATTR_STR(c9-residency, power_pkg_c9_res, "event=0x09");
> +EVENT_ATTR_STR(c10-residency, power_pkg_c10_res, "event=0x0a");
> +
> +static cpumask_t power_cstate_core_cpu_mask;

That one typically does not need a cpumask.

> +static cpumask_t power_cstate_pkg_cpu_mask;

> +static int power_cstate_pmu_event_init(struct perf_event *event)
> +{
> +     u64 cfg = event->attr.config;
> +     int ret = 0;
> +
> +     if (event->attr.type != event->pmu->type)
> +             return -ENOENT;
> +
> +     /*
> +      * check event is known (determines counter)
> +      */
> +     if (cfg >= PERF_POWER_CSTATE_EVENT_MAX)
> +             return -EINVAL;
> +
> +     /* unsupported modes and filters */
> +     if (event->attr.exclude_user   ||
> +         event->attr.exclude_kernel ||
> +         event->attr.exclude_hv     ||
> +         event->attr.exclude_idle   ||
> +         event->attr.exclude_host   ||
> +         event->attr.exclude_guest  ||
> +         event->attr.sample_period) /* no sampling */
> +             return -EINVAL;

Same as with the MSR thing, but worse. What happens if we hand a config
value for the 'wrong' PMU type?

What happens if we hand a pkg c10 config to one that doesn't have c10?

> +     /* must be done before validate_group */
> +     event->hw.event_base = power_cstate_events[cfg].msr;
> +     event->hw.config = cfg;
> +     event->hw.idx = -1;
> +
> +     return ret;
> +}
> +
> +static inline u64 power_cstate_pmu_read_counter(struct perf_event *event)
> +{
> +     u64 val;
> +
> +     rdmsrl_safe(event->hw.event_base, &val);

No, at this point you had better know if that MSR exists or not. If it
does not the event should not exist.

> +     return val;
> +}

 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to