Sampling is inherently a feature for CPU PMUs, given that the thing
to be sampled is a CPU context. These days, we have many more
uncore/system PMUs than CPU PMUs, so it no longer makes much sense to
assume sampling support by default and force the ever-growing majority
of drivers to opt out of it (or erroneously fail to). Instead, let's
introduce a positive opt-in capability that's more obvious and easier to
maintain.

Signed-off-by: Robin Murphy <robin.mur...@arm.com>
---
 arch/alpha/kernel/perf_event.c       |  3 ++-
 arch/arc/kernel/perf_event.c         |  2 ++
 arch/csky/kernel/perf_event.c        |  2 ++
 arch/loongarch/kernel/perf_event.c   |  1 +
 arch/mips/kernel/perf_event_mipsxx.c |  1 +
 arch/powerpc/perf/core-book3s.c      |  1 +
 arch/powerpc/perf/core-fsl-emb.c     |  1 +
 arch/powerpc/perf/imc-pmu.c          |  1 +
 arch/s390/kernel/perf_cpum_cf.c      |  1 +
 arch/s390/kernel/perf_cpum_sf.c      |  2 ++
 arch/s390/kernel/perf_pai_crypto.c   |  1 +
 arch/s390/kernel/perf_pai_ext.c      |  1 +
 arch/sparc/kernel/perf_event.c       |  1 +
 arch/x86/events/amd/ibs.c            |  2 ++
 arch/x86/events/core.c               |  4 +++-
 arch/xtensa/kernel/perf_event.c      |  1 +
 drivers/perf/arm_pmu.c               |  3 ++-
 drivers/perf/arm_pmu_platform.c      |  1 +
 drivers/perf/arm_spe_pmu.c           |  3 ++-
 drivers/perf/riscv_pmu_sbi.c         |  2 ++
 include/linux/perf_event.h           |  3 ++-
 kernel/events/core.c                 | 20 +++++++++++---------
 kernel/events/hw_breakpoint.c        |  1 +
 23 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 8557165e64c0..4de1802d249f 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -761,7 +761,8 @@ static struct pmu pmu = {
        .start          = alpha_pmu_start,
        .stop           = alpha_pmu_stop,
        .read           = alpha_pmu_read,
-       .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
+       .capabilities   = PERF_PMU_CAP_SAMPLING |
+                         PERF_PMU_CAP_NO_EXCLUDE,
 };
 
 
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index ed6d4f0cd621..1b99b0215027 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -818,6 +818,8 @@ static int arc_pmu_device_probe(struct platform_device 
*pdev)
 
        if (irq == -1)
                arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+       else
+               arc_pmu->pmu.capabilities |= PERF_PMU_CAP_SAMPLING;
 
        /*
         * perf parser doesn't really like '-' symbol in events name, so let's
diff --git a/arch/csky/kernel/perf_event.c b/arch/csky/kernel/perf_event.c
index e0a36acd265b..c5ba6e235a6f 100644
--- a/arch/csky/kernel/perf_event.c
+++ b/arch/csky/kernel/perf_event.c
@@ -1204,6 +1204,7 @@ int init_hw_perf_events(void)
        }
 
        csky_pmu.pmu = (struct pmu) {
+               .capabilities   = PERF_PMU_CAP_SAMPLING,
                .pmu_enable     = csky_pmu_enable,
                .pmu_disable    = csky_pmu_disable,
                .event_init     = csky_pmu_event_init,
@@ -1314,6 +1315,7 @@ int csky_pmu_device_probe(struct platform_device *pdev,
 
        ret = csky_pmu_request_irq(csky_pmu_handle_irq);
        if (ret) {
+               csky_pmu.pmu.capabilities &= ~PERF_PMU_CAP_SAMPLING;
                csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
                pr_notice("[perf] PMU request irq fail!\n");
        }
diff --git a/arch/loongarch/kernel/perf_event.c 
b/arch/loongarch/kernel/perf_event.c
index 8ad098703488..341b17bedd0e 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -571,6 +571,7 @@ static int loongarch_pmu_event_init(struct perf_event 
*event)
 }
 
 static struct pmu pmu = {
+       .capabilities   = PERF_PMU_CAP_SAMPLING,
        .pmu_enable     = loongarch_pmu_enable,
        .pmu_disable    = loongarch_pmu_disable,
        .event_init     = loongarch_pmu_event_init,
diff --git a/arch/mips/kernel/perf_event_mipsxx.c 
b/arch/mips/kernel/perf_event_mipsxx.c
index 196a070349b0..4c5d64d1158e 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -687,6 +687,7 @@ static int mipspmu_event_init(struct perf_event *event)
 }
 
 static struct pmu pmu = {
+       .capabilities   = PERF_PMU_CAP_SAMPLING,
        .pmu_enable     = mipspmu_enable,
        .pmu_disable    = mipspmu_disable,
        .event_init     = mipspmu_event_init,
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index d67f7d511f13..cfe7d3c120e1 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2207,6 +2207,7 @@ ssize_t power_events_sysfs_show(struct device *dev,
 }
 
 static struct pmu power_pmu = {
+       .capabilities   = PERF_PMU_CAP_SAMPLING,
        .pmu_enable     = power_pmu_enable,
        .pmu_disable    = power_pmu_disable,
        .event_init     = power_pmu_event_init,
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index 509932b91b75..62038ff3663f 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -570,6 +570,7 @@ static int fsl_emb_pmu_event_init(struct perf_event *event)
 }
 
 static struct pmu fsl_emb_pmu = {
+       .capabilities   = PERF_PMU_CAP_SAMPLING,
        .pmu_enable     = fsl_emb_pmu_enable,
        .pmu_disable    = fsl_emb_pmu_disable,
        .event_init     = fsl_emb_pmu_event_init,
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 8664a7d297ad..f352dda3baf9 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1507,6 +1507,7 @@ static int update_pmu_ops(struct imc_pmu *pmu)
                pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
                break;
        case IMC_DOMAIN_TRACE:
+               pmu->pmu.capabilities |= PERF_PMU_CAP_SAMPLING;
                pmu->pmu.event_init = trace_imc_event_init;
                pmu->pmu.add = trace_imc_event_add;
                pmu->pmu.del = trace_imc_event_del;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 4d09954ebf49..7d10842d54f0 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -1861,6 +1861,7 @@ static const struct attribute_group *cfdiag_attr_groups[] 
= {
  */
 static struct pmu cf_diag = {
        .task_ctx_nr  = perf_sw_context,
+       .capabilities = PERF_PMU_CAP_SAMPLING,
        .event_init   = cfdiag_event_init,
        .pmu_enable   = cpumf_pmu_enable,
        .pmu_disable  = cpumf_pmu_disable,
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index f432869f8921..3d2c400f0aaa 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1892,6 +1892,8 @@ static const struct attribute_group 
*cpumsf_pmu_attr_groups[] = {
 };
 
 static struct pmu cpumf_sampling = {
+       .capabilities = PERF_PMU_CAP_SAMPLING,
+
        .pmu_enable   = cpumsf_pmu_enable,
        .pmu_disable  = cpumsf_pmu_disable,
 
diff --git a/arch/s390/kernel/perf_pai_crypto.c 
b/arch/s390/kernel/perf_pai_crypto.c
index f373a1009c45..a64b6b056a21 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -569,6 +569,7 @@ static const struct attribute_group *paicrypt_attr_groups[] 
= {
 /* Performance monitoring unit for mapped counters */
 static struct pmu paicrypt = {
        .task_ctx_nr  = perf_hw_context,
+       .capabilities = PERF_PMU_CAP_SAMPLING,
        .event_init   = paicrypt_event_init,
        .add          = paicrypt_add,
        .del          = paicrypt_del,
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index d827473e7f87..1261f80c6d52 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -595,6 +595,7 @@ static const struct attribute_group *paiext_attr_groups[] = 
{
 /* Performance monitoring unit for mapped counters */
 static struct pmu paiext = {
        .task_ctx_nr  = perf_hw_context,
+       .capabilities = PERF_PMU_CAP_SAMPLING,
        .event_init   = paiext_event_init,
        .add          = paiext_add,
        .del          = paiext_del,
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 706127749c66..6ecea8e7b592 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1573,6 +1573,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu)
 }
 
 static struct pmu pmu = {
+       .capabilities   = PERF_PMU_CAP_SAMPLING,
        .pmu_enable     = sparc_pmu_enable,
        .pmu_disable    = sparc_pmu_disable,
        .event_init     = sparc_pmu_event_init,
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 95de309fc7d5..ed07d80b6fe0 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -768,6 +768,7 @@ static struct perf_ibs perf_ibs_fetch = {
        .pmu = {
                .task_ctx_nr    = perf_hw_context,
 
+               .capabilities   = PERF_PMU_CAP_SAMPLING,
                .event_init     = perf_ibs_init,
                .add            = perf_ibs_add,
                .del            = perf_ibs_del,
@@ -793,6 +794,7 @@ static struct perf_ibs perf_ibs_op = {
        .pmu = {
                .task_ctx_nr    = perf_hw_context,
 
+               .capabilities   = PERF_PMU_CAP_SAMPLING,
                .event_init     = perf_ibs_init,
                .add            = perf_ibs_add,
                .del            = perf_ibs_del,
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index eca5bb49aa85..72a4c43951ee 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1837,7 +1837,7 @@ static void __init pmu_check_apic(void)
         * sample via a hrtimer based software event):
         */
        pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
+       pmu.capabilities &= ~PERF_PMU_CAP_SAMPLING;
 }
 
 static struct attribute_group x86_pmu_format_group __ro_after_init = {
@@ -2698,6 +2698,8 @@ static bool x86_pmu_filter(struct pmu *pmu, int cpu)
 }
 
 static struct pmu pmu = {
+       .capabilities           = PERF_PMU_CAP_SAMPLING,
+
        .pmu_enable             = x86_pmu_enable,
        .pmu_disable            = x86_pmu_disable,
 
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
index 223f1d452310..b03a2feb0f92 100644
--- a/arch/xtensa/kernel/perf_event.c
+++ b/arch/xtensa/kernel/perf_event.c
@@ -397,6 +397,7 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
 }
 
 static struct pmu xtensa_pmu = {
+       .capabilities = PERF_PMU_CAP_SAMPLING,
        .pmu_enable = xtensa_pmu_enable,
        .pmu_disable = xtensa_pmu_disable,
        .event_init = xtensa_pmu_event_init,
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 2c1af3a0207c..72d8f38d0aa5 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -876,7 +876,8 @@ struct arm_pmu *armpmu_alloc(void)
                 * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE events on a
                 * specific PMU.
                 */
-               .capabilities   = PERF_PMU_CAP_EXTENDED_REGS |
+               .capabilities   = PERF_PMU_CAP_SAMPLING |
+                                 PERF_PMU_CAP_EXTENDED_REGS |
                                  PERF_PMU_CAP_EXTENDED_HW_TYPE,
        };
 
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 118170a5cede..ab7a802cd0d6 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -109,6 +109,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
         */
        if (num_irqs == 0) {
                dev_warn(dev, "no irqs for PMU, sampling events not 
supported\n");
+               pmu->pmu.capabilities &= ~PERF_PMU_CAP_SAMPLING;
                pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
                cpumask_setall(&pmu->supported_cpus);
                return 0;
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 369e77ad5f13..dbd52851f5c6 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -955,7 +955,8 @@ static int arm_spe_pmu_perf_init(struct arm_spe_pmu 
*spe_pmu)
        spe_pmu->pmu = (struct pmu) {
                .module = THIS_MODULE,
                .parent         = &spe_pmu->pdev->dev,
-               .capabilities   = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
+               .capabilities   = PERF_PMU_CAP_SAMPLING |
+                                 PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
                .attr_groups    = arm_spe_pmu_attr_groups,
                /*
                 * We hitch a ride on the software context here, so that
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 698de8ddf895..d185ea8c47ba 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -1361,6 +1361,8 @@ static int pmu_sbi_device_probe(struct platform_device 
*pdev)
                pr_info("Perf sampling/filtering is not supported as sscof 
extension is not available\n");
                pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
                pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
+       } else {
+               pmu->pmu.capabilities |= PERF_PMU_CAP_SAMPLING;
        }
 
        pmu->pmu.attr_groups = riscv_pmu_attr_groups;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4d439c24c901..bf2cfbeabba2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -294,7 +294,7 @@ struct perf_event_pmu_context;
 /**
  * pmu::capabilities flags
  */
-#define PERF_PMU_CAP_NO_INTERRUPT      0x0001
+#define PERF_PMU_CAP_SAMPLING          0x0001
 #define PERF_PMU_CAP_NO_NMI            0x0002
 #define PERF_PMU_CAP_AUX_NO_SG         0x0004
 #define PERF_PMU_CAP_EXTENDED_REGS     0x0008
@@ -305,6 +305,7 @@ struct perf_event_pmu_context;
 #define PERF_PMU_CAP_EXTENDED_HW_TYPE  0x0100
 #define PERF_PMU_CAP_AUX_PAUSE         0x0200
 #define PERF_PMU_CAP_AUX_PREFER_LARGE  0x0400
+#define PERF_PMU_CAP_NO_INTERRUPT      0x0800
 
 /**
  * pmu::scope
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8060c2857bb2..71b2a6730705 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4359,7 +4359,7 @@ perf_adjust_freq_unthr_context(struct perf_event_context 
*ctx, bool unthrottle)
                        continue;
                if (!perf_pmu_ctx_is_active(pmu_ctx))
                        continue;
-               if (pmu_ctx->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT)
+               if (!(pmu_ctx->pmu->capabilities & PERF_PMU_CAP_SAMPLING))
                        continue;
 
                perf_pmu_disable(pmu_ctx->pmu);
@@ -10819,7 +10819,7 @@ static int perf_swevent_init(struct perf_event *event)
 static struct pmu perf_swevent = {
        .task_ctx_nr    = perf_sw_context,
 
-       .capabilities   = PERF_PMU_CAP_NO_NMI,
+       .capabilities   = PERF_PMU_CAP_SAMPLING | PERF_PMU_CAP_NO_NMI,
 
        .event_init     = perf_swevent_init,
        .add            = perf_swevent_add,
@@ -10861,6 +10861,7 @@ static int perf_tp_event_init(struct perf_event *event)
 static struct pmu perf_tracepoint = {
        .task_ctx_nr    = perf_sw_context,
 
+       .capabilities   = PERF_PMU_CAP_SAMPLING,
        .event_init     = perf_tp_event_init,
        .add            = perf_trace_add,
        .del            = perf_trace_del,
@@ -11066,6 +11067,7 @@ static struct pmu perf_kprobe = {
        .stop           = perf_swevent_stop,
        .read           = perf_swevent_read,
        .attr_groups    = kprobe_attr_groups,
+       .capabilities   = PERF_PMU_CAP_SAMPLING,
 };
 
 static int perf_kprobe_event_init(struct perf_event *event)
@@ -11125,6 +11127,7 @@ static struct pmu perf_uprobe = {
        .stop           = perf_swevent_stop,
        .read           = perf_swevent_read,
        .attr_groups    = uprobe_attr_groups,
+       .capabilities   = PERF_PMU_CAP_SAMPLING,
 };
 
 static int perf_uprobe_event_init(struct perf_event *event)
@@ -11899,7 +11902,7 @@ static int cpu_clock_event_init(struct perf_event 
*event)
 static struct pmu perf_cpu_clock = {
        .task_ctx_nr    = perf_sw_context,
 
-       .capabilities   = PERF_PMU_CAP_NO_NMI,
+       .capabilities   = PERF_PMU_CAP_SAMPLING | PERF_PMU_CAP_NO_NMI,
        .dev            = PMU_NULL_DEV,
 
        .event_init     = cpu_clock_event_init,
@@ -11982,7 +11985,7 @@ static int task_clock_event_init(struct perf_event 
*event)
 static struct pmu perf_task_clock = {
        .task_ctx_nr    = perf_sw_context,
 
-       .capabilities   = PERF_PMU_CAP_NO_NMI,
+       .capabilities   = PERF_PMU_CAP_SAMPLING | PERF_PMU_CAP_NO_NMI,
        .dev            = PMU_NULL_DEV,
 
        .event_init     = task_clock_event_init,
@@ -13476,11 +13479,10 @@ SYSCALL_DEFINE5(perf_event_open,
                goto err_task;
        }
 
-       if (is_sampling_event(event)) {
-               if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
-                       err = -EOPNOTSUPP;
-                       goto err_alloc;
-               }
+       if (is_sampling_event(event) &&
+           !(event->pmu->capabilities & PERF_PMU_CAP_SAMPLING)) {
+               err = -EOPNOTSUPP;
+               goto err_alloc;
        }
 
        /*
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 8ec2cb688903..604be7d7aecf 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -996,6 +996,7 @@ static void hw_breakpoint_stop(struct perf_event *bp, int 
flags)
 static struct pmu perf_breakpoint = {
        .task_ctx_nr    = perf_sw_context, /* could eventually get its own */
 
+       .capabilities   = PERF_PMU_CAP_SAMPLING,
        .event_init     = hw_breakpoint_event_init,
        .add            = hw_breakpoint_add,
        .del            = hw_breakpoint_del,
-- 
2.39.2.101.g768bb238c484.dirty


Reply via email to