drivers/perf/arm_smmuv3_pmu.c | 215 ++++++++++++++++++++++++----------
1 file changed, 152 insertions(+), 63 deletions(-)
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index d704eccc548f..8daa7ac6e801 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -47,8 +47,11 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/msi.h>
+#include <linux/mutex.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
+#include <linux/rculist.h>
+#include <linux/refcount.h>
#include <linux/smp.h>
#include <linux/sysfs.h>
#include <linux/types.h>
@@ -98,13 +101,24 @@
static int cpuhp_state_num;
-struct smmu_pmu {
+static LIST_HEAD(smmu_pmu_affinities);
+static DEFINE_MUTEX(smmu_pmu_affinity_lock);
+
+struct smmu_pmu_affinity {
struct hlist_node node;
+ struct list_head affinity_list;
+ struct list_head instance_list;
+ refcount_t refcount;
+ unsigned int irq;
+ unsigned int cpu;
+};
+
+struct smmu_pmu {
struct perf_event *events[SMMU_PMCG_MAX_COUNTERS];
DECLARE_BITMAP(used_counters, SMMU_PMCG_MAX_COUNTERS);
DECLARE_BITMAP(supported_events, SMMU_PMCG_ARCH_MAX_EVENTS);
- unsigned int irq;
- unsigned int on_cpu;
+ struct smmu_pmu_affinity *affinity;
+ struct list_head affinity_list;
struct pmu pmu;
unsigned int num_counters;
struct device *dev;
@@ -394,7 +408,7 @@ static int smmu_pmu_event_init(struct perf_event *event)
* Ensure all events are on the same cpu so all events are in the
* same cpu context, to avoid races on pmu_enable etc.
*/
- event->cpu = smmu_pmu->on_cpu;
+ event->cpu = smmu_pmu->affinity->cpu;
return 0;
}
@@ -478,9 +492,10 @@ static ssize_t smmu_pmu_cpumask_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct smmu_pmu_affinity *aff = to_smmu_pmu(pmu)->affinity;
- return cpumap_print_to_pagebuf(true, buf, cpumask_of(smmu_pmu->on_cpu));
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(aff->cpu));
}
static struct device_attribute smmu_pmu_cpumask_attr =
@@ -584,50 +599,140 @@ static const struct attribute_group
*smmu_pmu_attr_grps[] = {
static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
- struct smmu_pmu *smmu_pmu;
+ struct smmu_pmu_affinity *aff;
+ struct smmu_pmu *pmu;
unsigned int target;
- smmu_pmu = hlist_entry_safe(node, struct smmu_pmu, node);
- if (cpu != smmu_pmu->on_cpu)
+ aff = hlist_entry_safe(node, struct smmu_pmu_affinity, node);
+ if (cpu != aff->cpu)
return 0;
target = cpumask_any_but(cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
return 0;
- perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
- smmu_pmu->on_cpu = target;
- WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, cpumask_of(target)));
+ /* We're only reading, but this isn't the place to be involving RCU */
+ mutex_lock(&smmu_pmu_affinity_lock);
+ list_for_each_entry(pmu, &aff->instance_list, affinity_list)
+ perf_pmu_migrate_context(&pmu->pmu, aff->cpu, target);
+ mutex_unlock(&smmu_pmu_affinity_lock);
+
+ WARN_ON(irq_set_affinity_hint(aff->irq, cpumask_of(target)));
+ aff->cpu = target;
return 0;
}
static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data)
{
- struct smmu_pmu *smmu_pmu = data;
- u64 ovsr;
- unsigned int idx;
+ struct smmu_pmu_affinity *aff = data;
+ struct smmu_pmu *smmu_pmu;
+ irqreturn_t ret = IRQ_NONE;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(smmu_pmu, &aff->instance_list, affinity_list) {
+ unsigned int idx;
+ u64 ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
- ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
- if (!ovsr)
- return IRQ_NONE;
+ if (!ovsr)
+ continue;
- writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
+ writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
- for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) {
- struct perf_event *event = smmu_pmu->events[idx];
- struct hw_perf_event *hwc;
+ for_each_set_bit(idx, (unsigned long *)&ovsr,
smmu_pmu->num_counters) {
+ struct perf_event *event = smmu_pmu->events[idx];
- if (WARN_ON_ONCE(!event))
- continue;
+ if (WARN_ON_ONCE(!event))
+ continue;
+
+ smmu_pmu_event_update(event);
+ smmu_pmu_set_period(smmu_pmu, &event->hw);
+ }
+ ret = IRQ_HANDLED;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static struct smmu_pmu_affinity *__smmu_pmu_get_affinity(int irq)
+{
+ struct smmu_pmu_affinity *aff;
+ int ret;
+
+ list_for_each_entry(aff, &smmu_pmu_affinities, affinity_list)
+ if (aff->irq == irq && refcount_inc_not_zero(&aff->refcount))
+ return aff;
+
+ aff = kzalloc(sizeof(*aff), GFP_KERNEL);
+ if (!aff)
+ return ERR_PTR(-ENOMEM);
+
+ /* Pick one CPU to be the preferred one to use */
+ aff->cpu = raw_smp_processor_id();
+ refcount_set(&aff->refcount, 1);
+
+ ret = request_irq(irq, smmu_pmu_handle_irq,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ "smmuv3-pmu", aff);
+ if (ret)
+ goto out_free_aff;
+
+ ret = irq_set_affinity_hint(irq, cpumask_of(aff->cpu));
+ if (ret)
+ goto out_free_irq;
+
+ ret = cpuhp_state_add_instance_nocalls(cpuhp_state_num, &aff->node);
+ if (ret)
+ goto out_free_irq;
+
+ list_add(&aff->affinity_list, &smmu_pmu_affinities);
+ return aff;
+
+out_free_irq:
+ free_irq(irq, aff);
+out_free_aff:
+ kfree(aff);
+ return ERR_PTR(ret);
+}
+
+static int smmu_pmu_get_affinity(struct smmu_pmu *pmu, int irq)
+{
+ struct smmu_pmu_affinity *aff;
+
+ mutex_lock(&smmu_pmu_affinity_lock);
+ aff = __smmu_pmu_get_affinity(irq);
+ mutex_unlock(&smmu_pmu_affinity_lock);
+
+ if (IS_ERR(aff))
+ return PTR_ERR(aff);
+
+ pmu->affinity = aff;
+ mutex_lock(&smmu_pmu_affinity_lock);
+ list_add_rcu(&pmu->affinity_list, &aff->instance_list);
+ mutex_unlock(&smmu_pmu_affinity_lock);
+
+ return 0;
+}
+
+static void smmu_pmu_put_affinity(struct smmu_pmu *pmu)
+{
+ struct smmu_pmu_affinity *aff = pmu->affinity;
- smmu_pmu_event_update(event);
- hwc = &event->hw;
+ mutex_lock(&smmu_pmu_affinity_lock);
+ list_del_rcu(&pmu->affinity_list);
- smmu_pmu_set_period(smmu_pmu, hwc);
+ if (!refcount_dec_and_test(&aff->refcount)) {
+ mutex_unlock(&smmu_pmu_affinity_lock);
+ return;
}
- return IRQ_HANDLED;
+ list_del(&aff->affinity_list);
+ mutex_unlock(&smmu_pmu_affinity_lock);
+
+ free_irq(aff->irq, aff);
+ cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &aff->node);
+ kfree(aff);
}
static void smmu_pmu_free_msis(void *data)
@@ -652,7 +757,7 @@ static void smmu_pmu_write_msi_msg(struct msi_desc *desc,
struct msi_msg *msg)
pmu->reg_base + SMMU_PMCG_IRQ_CFG2);
}
-static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
+static int smmu_pmu_setup_msi(struct smmu_pmu *pmu)
{
struct msi_desc *desc;
struct device *dev = pmu->dev;
@@ -663,34 +768,34 @@ static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
/* MSI supported or not */
if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI))
- return;
+ return 0;
ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
if (ret) {
dev_warn(dev, "failed to allocate MSIs\n");
- return;
+ return ret;
}
desc = first_msi_entry(dev);
if (desc)
- pmu->irq = desc->irq;
+ ret = desc->irq;
/* Add callback to free MSIs on teardown */
devm_add_action(dev, smmu_pmu_free_msis, dev);
+ return ret;
}
static int smmu_pmu_setup_irq(struct smmu_pmu *pmu)
{
- unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD;
- int irq, ret = -ENXIO;
+ int irq;
- smmu_pmu_setup_msi(pmu);
+ irq = smmu_pmu_setup_msi(pmu);
+ if (irq <= 0)
+ irq = platform_get_irq(to_platform_device(pmu->dev), 0);
+ if (irq < 0)
+ return irq;
- irq = pmu->irq;
- if (irq)
- ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq,
- flags, "smmuv3-pmu", pmu);
- return ret;
+ return smmu_pmu_get_affinity(pmu, irq);
}
static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu)
@@ -730,7 +835,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
struct resource *res_0;
u32 cfgr, reg_size;
u64 ceid_64[2];
- int irq, err;
+ int err;
char *name;
struct device *dev = &pdev->dev;
@@ -771,10 +876,6 @@ static int smmu_pmu_probe(struct platform_device *pdev)
smmu_pmu->reloc_base = smmu_pmu->reg_base;
}
- irq = platform_get_irq(pdev, 0);
- if (irq > 0)
- smmu_pmu->irq = irq;
-
ceid_64[0] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID0);
ceid_64[1] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID1);
bitmap_from_arr32(smmu_pmu->supported_events, (u32 *)ceid_64,
@@ -789,12 +890,6 @@ static int smmu_pmu_probe(struct platform_device *pdev)
smmu_pmu_reset(smmu_pmu);
- err = smmu_pmu_setup_irq(smmu_pmu);
- if (err) {
- dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
- return err;
- }
-
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx",
(res_0->start) >> SMMU_PMCG_PA_SHIFT);
if (!name) {
@@ -804,16 +899,9 @@ static int smmu_pmu_probe(struct platform_device *pdev)
smmu_pmu_get_acpi_options(smmu_pmu);
- /* Pick one CPU to be the preferred one to use */
- smmu_pmu->on_cpu = raw_smp_processor_id();
- WARN_ON(irq_set_affinity_hint(smmu_pmu->irq,
- cpumask_of(smmu_pmu->on_cpu)));
-
- err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
- &smmu_pmu->node);
+ err = smmu_pmu_setup_irq(smmu_pmu);
if (err) {
- dev_err(dev, "Error %d registering hotplug, PMU @%pa\n",
- err, &res_0->start);
+ dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
return err;
}
@@ -832,7 +920,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
return 0;
out_unregister:
- cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
+ smmu_pmu_put_affinity(smmu_pmu);
+ synchronize_rcu();
return err;
}
@@ -840,9 +929,9 @@ static int smmu_pmu_remove(struct platform_device *pdev)
{
struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
+ smmu_pmu_put_affinity(smmu_pmu);
+ /* perf will synchronise RCU before devres can free smmu_pmu */
perf_pmu_unregister(&smmu_pmu->pmu);
- cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
-
return 0;
}