On Wed, Aug 13, 2025 at 06:01:10PM +0100, Robin Murphy wrote:
> Only a handful of CPU PMUs accept PERF_TYPE_{RAW,HARDWARE,HW_CACHE}
> events without registering themselves as PERF_TYPE_RAW in the first
> place. Add an explicit opt-in for these special cases, so that we can
> make life easier for every other driver (and probably also speed up the
> slow-path search) by having perf_try_init_event() do the basic type
> checking to cover the majority of cases.
> 
> Signed-off-by: Robin Murphy <robin.mur...@arm.com>


To bikeshed a little here, I'm not keen on the PERF_PMU_CAP_RAW_EVENTS
name, because it's not clear what "RAW" really means, and people will
definitely read that to mean something else.

Could we go with something like PERF_PMU_CAP_COMMON_CPU_EVENTS, to make
it clear that this is about opting into CPU-PMU specific event types (of
which PERF_TYPE_RAW is one of)?

Likewise, s/is_raw_pmu()/pmu_supports_common_cpu_events()/.

> ---
> 
> A further possibility is to automatically add the cap to PERF_TYPE_RAW
> PMUs in perf_pmu_register() to have a single point-of-use condition; I'm
> undecided...

I reckon we don't need to automagically do that, but I reckon that
is_raw_pmu()/pmu_supports_common_cpu_events() should only check the cap,
and we don't read anything special into any of
PERF_TYPE_{RAW,HARDWARE,HW_CACHE}.

> ---
>  arch/s390/kernel/perf_cpum_cf.c    |  1 +
>  arch/s390/kernel/perf_pai_crypto.c |  2 +-
>  arch/s390/kernel/perf_pai_ext.c    |  2 +-
>  arch/x86/events/core.c             |  2 +-
>  drivers/perf/arm_pmu.c             |  1 +
>  include/linux/perf_event.h         |  1 +
>  kernel/events/core.c               | 15 +++++++++++++++
>  7 files changed, 21 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
> index 1a94e0944bc5..782ab755ddd4 100644
> --- a/arch/s390/kernel/perf_cpum_cf.c
> +++ b/arch/s390/kernel/perf_cpum_cf.c
> @@ -1054,6 +1054,7 @@ static void cpumf_pmu_del(struct perf_event *event, int 
> flags)
>  /* Performance monitoring unit for s390x */
>  static struct pmu cpumf_pmu = {
>       .task_ctx_nr  = perf_sw_context,
> +     .capabilities = PERF_PMU_CAP_RAW_EVENTS,
>       .pmu_enable   = cpumf_pmu_enable,
>       .pmu_disable  = cpumf_pmu_disable,
>       .event_init   = cpumf_pmu_event_init,

Tangential, but use of perf_sw_context here looks bogus.

> diff --git a/arch/s390/kernel/perf_pai_crypto.c 
> b/arch/s390/kernel/perf_pai_crypto.c
> index a64b6b056a21..b5b6d8b5d943 100644
> --- a/arch/s390/kernel/perf_pai_crypto.c
> +++ b/arch/s390/kernel/perf_pai_crypto.c
> @@ -569,7 +569,7 @@ static const struct attribute_group 
> *paicrypt_attr_groups[] = {
>  /* Performance monitoring unit for mapped counters */
>  static struct pmu paicrypt = {
>       .task_ctx_nr  = perf_hw_context,
> -     .capabilities = PERF_PMU_CAP_SAMPLING,
> +     .capabilities = PERF_PMU_CAP_SAMPLING | PERF_PMU_CAP_RAW_EVENTS,
>       .event_init   = paicrypt_event_init,
>       .add          = paicrypt_add,
>       .del          = paicrypt_del,
> diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
> index 1261f80c6d52..bcd28c38da70 100644
> --- a/arch/s390/kernel/perf_pai_ext.c
> +++ b/arch/s390/kernel/perf_pai_ext.c
> @@ -595,7 +595,7 @@ static const struct attribute_group *paiext_attr_groups[] 
> = {
>  /* Performance monitoring unit for mapped counters */
>  static struct pmu paiext = {
>       .task_ctx_nr  = perf_hw_context,
> -     .capabilities = PERF_PMU_CAP_SAMPLING,
> +     .capabilities = PERF_PMU_CAP_SAMPLING | PERF_PMU_CAP_RAW_EVENTS,
>       .event_init   = paiext_event_init,
>       .add          = paiext_add,
>       .del          = paiext_del,
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 789dfca2fa67..764728bb80ae 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -2697,7 +2697,7 @@ static bool x86_pmu_filter(struct pmu *pmu, int cpu)
>  }
>  
>  static struct pmu pmu = {
> -     .capabilities           = PERF_PMU_CAP_SAMPLING,
> +     .capabilities           = PERF_PMU_CAP_SAMPLING | 
> PERF_PMU_CAP_RAW_EVENTS,
>  
>       .pmu_enable             = x86_pmu_enable,
>       .pmu_disable            = x86_pmu_disable,
> diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
> index 72d8f38d0aa5..bc772a3bf411 100644
> --- a/drivers/perf/arm_pmu.c
> +++ b/drivers/perf/arm_pmu.c
> @@ -877,6 +877,7 @@ struct arm_pmu *armpmu_alloc(void)
>                * specific PMU.
>                */
>               .capabilities   = PERF_PMU_CAP_SAMPLING |
> +                               PERF_PMU_CAP_RAW_EVENTS |
>                                 PERF_PMU_CAP_EXTENDED_REGS |
>                                 PERF_PMU_CAP_EXTENDED_HW_TYPE,
>       };
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 183b7c48b329..c6ad036c0037 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -305,6 +305,7 @@ struct perf_event_pmu_context;
>  #define PERF_PMU_CAP_EXTENDED_HW_TYPE        0x0100
>  #define PERF_PMU_CAP_AUX_PAUSE               0x0200
>  #define PERF_PMU_CAP_AUX_PREFER_LARGE        0x0400
> +#define PERF_PMU_CAP_RAW_EVENTS              0x0800
>  
>  /**
>   * pmu::scope
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 71b2a6730705..2ecee76d2ae2 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -12556,11 +12556,26 @@ static inline bool has_extended_regs(struct 
> perf_event *event)
>              (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK);
>  }
>  
> +static bool is_raw_pmu(const struct pmu *pmu)
> +{
> +     return pmu->type == PERF_TYPE_RAW ||
> +            pmu->capabilities & PERF_PMU_CAP_RAW_EVENTS;
> +}

As above, I reckon we should make this:

static bool pmu_supports_common_cpu_events(const struct pmu *pmu)
{
        return pmu->capabilities & PERF_PMU_CAP_RAW_EVENTS;
}

Other than the above, this looks good to me.

Mark.

> +
>  static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
>  {
>       struct perf_event_context *ctx = NULL;
>       int ret;
>  
> +     /*
> +      * Before touching anything, we can safely skip:
> +      * - any event for a specific PMU which is not this one
> +      * - any common event if this PMU doesn't support them
> +      */
> +     if (event->attr.type != pmu->type &&
> +         (event->attr.type >= PERF_TYPE_MAX || is_raw_pmu(pmu)))
> +             return -ENOENT;
> +
>       if (!try_module_get(pmu->module))
>               return -ENODEV;
>  
> -- 
> 2.39.2.101.g768bb238c484.dirty
> 

Reply via email to