On Mon, Jan 09, 2017 at 09:33:47PM -0600, Suravee Suthikulpanit wrote: > This patch adds multi-IOMMU support for perf by exposing > an AMD IOMMU PMU for each IOMMU found in the system via: > > /sys/device/amd_iommu_x /* where x is the IOMMU index. */
Straight into the top-level devices hierarchy? Why not add a proper hierarchy to /sys/devices/system/iommu/ ... ? Jörg, don't you have a sane iommu hierarchy already in sysfs? > This allows users to specify different events to be programed > onto performance counters of each IOMMU. > > Cc: Peter Zijlstra <pet...@infradead.org> > Cc: Borislav Petkov <b...@alien8.de> > Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpa...@amd.com> > --- > arch/x86/events/amd/iommu.c | 119 > ++++++++++++++++++++++++-------------------- > 1 file changed, 64 insertions(+), 55 deletions(-) > > diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c > index 2403c78..5fd97b5 100644 > --- a/arch/x86/events/amd/iommu.c > +++ b/arch/x86/events/amd/iommu.c > @@ -35,10 +35,13 @@ > #define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL) > #define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL) > > -static struct perf_amd_iommu __perf_iommu; > +#define PERF_AMD_IOMMU_NAME_SZ 16 > > struct perf_amd_iommu { > + struct list_head list; > struct pmu pmu; > + uint idx; This uint thing has crept in: $ git grep "uint " arch/x86/ arch/x86/crypto/crc32-pclmul_asm.S:101: * uint crc32_pclmul_le_16(unsigned char const *buffer, arch/x86/crypto/crc32-pclmul_asm.S:102: * size_t len, uint crc32) arch/x86/events/amd/iommu.h:32:extern u8 amd_iommu_pc_get_max_banks(uint idx); arch/x86/events/amd/iommu.h:34:extern u8 amd_iommu_pc_get_max_counters(uint idx); arch/x86/events/amd/iommu.h:36:extern int amd_iommu_pc_set_reg(uint idx, u16 devid, u8 bank, u8 cntr, arch/x86/events/amd/iommu.h:39:extern int amd_iommu_pc_set_counter(uint idx, u8 bank, u8 cntr, u64 *value); arch/x86/events/amd/iommu.h:41:extern int amd_iommu_pc_get_counter(uint idx, u8 bank, u8 cntr, u64 *value); "unsigned int" please. > + char name[PERF_AMD_IOMMU_NAME_SZ]; > u8 max_banks; > u8 max_counters; > u64 cntr_assign_mask; > @@ -46,6 +49,8 @@ struct perf_amd_iommu { > const struct attribute_group *attr_groups[4]; > }; > > +LIST_HEAD(perf_amd_iommu_list); static > + > #define format_group attr_groups[0] > #define cpumask_group attr_groups[1] > #define events_group attr_groups[2] > @@ -204,8 +209,7 @@ static int clear_avail_iommu_bnk_cntr(struct > perf_amd_iommu *perf_iommu, > static int perf_iommu_event_init(struct perf_event *event) > { > struct hw_perf_event *hwc = &event->hw; > - struct perf_amd_iommu *perf_iommu; > - u64 config, config1; > + struct perf_amd_iommu *pi = container_of(event->pmu, struct > perf_amd_iommu, pmu); Do this deref when you actually need it: struct perf_amd_iommu *pi; because you might exit earlier and end up doing it for nothing. > > /* test the event attr type check for PMU enumeration */ > if (event->attr.type != event->pmu->type) > @@ -227,27 +231,17 @@ static int perf_iommu_event_init(struct perf_event > *event) > if (event->cpu < 0) > return -EINVAL; > > - perf_iommu = &__perf_iommu; > - > - if (event->pmu != &perf_iommu->pmu) > - return -ENOENT; > - > - if (perf_iommu) { > - config = event->attr.config; > - config1 = event->attr.config1; > - } else { > - return -EINVAL; > - } > - > /* update the hw_perf_event struct with the iommu config data */ > - hwc->config = config; > - hwc->extra_reg.config = config1; ---> here: pi = container_of(event->pmu, struct perf_amd_iommu, pmu); > + hwc->idx = pi->idx; > + hwc->config = event->attr.config; > + hwc->extra_reg.config = event->attr.config1; Align vertically on the = sign. > > return 0; > } > > static void perf_iommu_enable_event(struct perf_event *ev) > { > + struct hw_perf_event *hwc = &ev->hw; > u8 csource = _GET_CSOURCE(ev); > u16 devid = _GET_DEVID(ev); > u8 bank = _GET_BANK(ev); ... > static void perf_iommu_disable_event(struct perf_event *event) > { > + struct hw_perf_event *hwc = &event->hw; > u64 reg = 0ULL; > > - amd_iommu_pc_set_reg(0, _GET_DEVID(event), _GET_BANK(event), > + amd_iommu_pc_set_reg(hwc->idx, _GET_DEVID(event), _GET_BANK(event), > _GET_CNTR(event), IOMMU_PC_COUNTER_SRC_REG, ®); > } > > @@ -301,7 +296,7 @@ static void perf_iommu_start(struct perf_event *event, > int flags) > > val = local64_read(&hwc->prev_count); > > - amd_iommu_pc_set_counter(0, _GET_BANK(event), _GET_CNTR(event), &val); > + amd_iommu_pc_set_counter(hwc->idx, _GET_BANK(event), _GET_CNTR(event), > &val); > enable: > perf_iommu_enable_event(event); > perf_event_update_userpage(event); > @@ -314,7 +309,7 @@ static void perf_iommu_read(struct perf_event *event) > s64 delta; > struct hw_perf_event *hwc = &event->hw; > > - if (amd_iommu_pc_get_counter(0, _GET_BANK(event), _GET_CNTR(event), > &cnt)) > + if (amd_iommu_pc_get_counter(hwc->idx, _GET_BANK(event), > _GET_CNTR(event), &cnt)) > return; > > /* IOMMU pc counter register is only 48 bits */ > @@ -417,14 +412,20 @@ static __init int _init_events_attrs(struct > perf_amd_iommu *perf_iommu) > > static __init void amd_iommu_pc_exit(void) > { > - if (__perf_iommu.events_group != NULL) { > - kfree(__perf_iommu.events_group); > - __perf_iommu.events_group = NULL; > + struct perf_amd_iommu *pi, *next; > + > + list_for_each_entry_safe(pi, next, &perf_amd_iommu_list, list) { > + list_del(&pi->list); > + > + kfree(pi->events_group); > + pi->events_group = NULL; Why that assignment if you're going to free it in the next line? > + > + kfree(pi); > } > } > > -static __init int _init_perf_amd_iommu( > - struct perf_amd_iommu *perf_iommu, char *name) > +static __init int > +init_one_perf_amd_iommu(struct perf_amd_iommu *perf_iommu, uint idx) > { > int ret; > > @@ -441,54 +442,62 @@ static __init int _init_perf_amd_iommu( > if (_init_events_attrs(perf_iommu) != 0) > pr_err("Only support raw events.\n"); Here's also /* Init cpumask attributes to only core 0 */ cpumask_set_cpu(0, &iommu_cpumask); is that still correct for all IOMMUs? > > - perf_iommu->max_banks = amd_iommu_pc_get_max_banks(0); > - perf_iommu->max_counters = amd_iommu_pc_get_max_counters(0); > + snprintf(perf_iommu->name, PERF_AMD_IOMMU_NAME_SZ, "amd_iommu_%u", idx); Do this ^^ > + perf_iommu->idx = idx; > + perf_iommu->max_banks = amd_iommu_pc_get_max_banks(idx); > + perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx); > if (!perf_iommu->max_banks || !perf_iommu->max_counters) > return -EINVAL; ... here, after the test has passed. > /* Init null attributes */ > perf_iommu->null_group = NULL; No need for this - you're getting zeroed memory from kzalloc(). > + /* Setting up PMU */ > + perf_iommu->pmu.event_init = perf_iommu_event_init, > + perf_iommu->pmu.add = perf_iommu_add, > + perf_iommu->pmu.del = perf_iommu_del, > + perf_iommu->pmu.start = perf_iommu_start, > + perf_iommu->pmu.stop = perf_iommu_stop, > + perf_iommu->pmu.read = perf_iommu_read, > + perf_iommu->pmu.task_ctx_nr = perf_invalid_context; > perf_iommu->pmu.attr_groups = perf_iommu->attr_groups; Align vertically on the = > - ret = perf_pmu_register(&perf_iommu->pmu, name, -1); > + ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1); > if (ret) { > pr_err("Error initializing AMD IOMMU perf counters.\n"); > amd_iommu_pc_exit(); > } else { > - pr_info("perf: amd_iommu: Detected. (%d banks, %d > counters/bank)\n", > - amd_iommu_pc_get_max_banks(0), > - amd_iommu_pc_get_max_counters(0)); > + pr_info("Detected %s, w/ %d banks, %d counters/bank\n", I'd prefer: "Detected AMD IOMMU #%d (%d banks, %d counters/bank)" where the "#%d" is idx. > static __init int amd_iommu_pc_init(void) > { > + uint i; > + > /* Make sure the IOMMU PC resource is available */ > if (!amd_iommu_pc_supported()) > return -ENODEV; > > - _init_perf_amd_iommu(&__perf_iommu, "amd_iommu"); > + for (i = 0 ; i < amd_iommu_get_num_iommus(); i++) { > + int ret; > + struct perf_amd_iommu *pi; > + > + pi = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL); > + if (!pi) > + return -ENOMEM; > + > + ret = init_one_perf_amd_iommu(pi, i); > + if (ret) > + return ret; You're leaking here memory if some IOMMUs fail and that function returns -EINVAL from the max_banks and counters test above. So you need to unwind all that setup init_one_perf_amd_iommu() did for the iommus from 0 up to i - 1; IOW; you need to move the call to amd_iommu_pc_exit() here and not in the init function. That'll be all for now. :-) Thanks. -- Regards/Gruss, Boris. Good mailing practices for 400: avoid top-posting and trim the reply. _______________________________________________ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu