>-----Original Message----- >From: Konstantin Ananyev <konstantin.anan...@huawei.com> >Sent: Monday, February 27, 2023 9:53 PM >To: Tomasz Duszynski <tduszyn...@marvell.com>; Konstantin Ananyev ><konstantin.v.anan...@yandex.ru>; >dev@dpdk.org >Subject: RE: [EXT] Re: [PATCH v11 1/4] lib: add generic support for reading >PMU events > > > >> >> Add support for programming PMU counters and reading their values >> >> in runtime bypassing kernel completely. >> >> >> >> This is especially useful in cases where CPU cores are isolated i.e >> >> run dedicated tasks. In such cases one cannot use standard perf >> >> utility without sacrificing latency and performance. >> >> >> >> Signed-off-by: Tomasz Duszynski <tduszyn...@marvell.com> >> >> Acked-by: Morten Brørup <m...@smartsharesystems.com> >> > >> >Few more comments/questions below. >> > >> > >> >> diff --git a/lib/pmu/rte_pmu.c b/lib/pmu/rte_pmu.c new file mode >> >> 100644 index 0000000000..950f999cb7 >> >> --- /dev/null >> >> +++ b/lib/pmu/rte_pmu.c >> >> @@ -0,0 +1,460 @@ >> >> +/* SPDX-License-Identifier: BSD-3-Clause >> >> + * Copyright(C) 2023 Marvell International Ltd. >> >> + */ >> >> + >> >> +#include <ctype.h> >> >> +#include <dirent.h> >> >> +#include <errno.h> >> >> +#include <regex.h> >> >> +#include <stdlib.h> >> >> +#include <string.h> >> >> +#include <sys/ioctl.h> >> >> +#include <sys/mman.h> >> >> +#include <sys/queue.h> >> >> +#include <sys/syscall.h> >> >> +#include <unistd.h> >> >> + >> >> +#include <rte_atomic.h> >> >> +#include <rte_per_lcore.h> >> >> +#include <rte_pmu.h> >> >> +#include <rte_spinlock.h> >> >> +#include <rte_tailq.h> >> >> + >> >> +#include "pmu_private.h" >> >> + >> >> +#define EVENT_SOURCE_DEVICES_PATH "/sys/bus/event_source/devices" >> >> + >> >> +#define GENMASK_ULL(h, l) ((~0ULL - (1ULL << (l)) + 1) & (~0ULL >> >> >> +((64 - 1 - (h))))) #define FIELD_PREP(m, v) (((uint64_t)(v) << >> >> +(__builtin_ffsll(m) - 1)) & (m)) >> >> + >> >> +RTE_DEFINE_PER_LCORE(struct rte_pmu_event_group, _event_group); >> >> +struct rte_pmu rte_pmu; >> >> + >> >> +/* >> >> + * Following __rte_weak functions provide default no-op. >> >> +Architectures should override them if >> >> + * necessary. >> >> + */ >> >> + >> >> +int >> >> +__rte_weak pmu_arch_init(void) >> >> +{ >> >> + return 0; >> >> +} >> >> + >> >> +void >> >> +__rte_weak pmu_arch_fini(void) >> >> +{ >> >> +} >> >> + >> >> +void >> >> +__rte_weak pmu_arch_fixup_config(uint64_t __rte_unused config[3]) >> >> +{ } >> >> + >> >> +static int >> >> +get_term_format(const char *name, int *num, uint64_t *mask) { >> >> + char path[PATH_MAX]; >> >> + char *config = NULL; >> >> + int high, low, ret; >> >> + FILE *fp; >> >> + >> >> + *num = *mask = 0; >> >> + snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH "/%s/format/%s", >> >> rte_pmu.name, >name); >> >> + fp = fopen(path, "r"); >> >> + if (fp == NULL) >> >> + return -errno; >> >> + >> >> + errno = 0; >> >> + ret = fscanf(fp, "%m[^:]:%d-%d", &config, &low, &high); >> >> + if (ret < 2) { >> >> + ret = -ENODATA; >> >> + goto out; >> >> + } >> >> + if (errno) { >> >> + ret = -errno; >> >> + goto out; >> >> + } >> >> + >> >> + if (ret == 2) >> >> + high = low; >> >> + >> >> + *mask = GENMASK_ULL(high, low); >> >> + /* Last digit should be [012]. If last digit is missing 0 is implied. */ >> >> + *num = config[strlen(config) - 1]; >> >> + *num = isdigit(*num) ? *num - '0' : 0; >> >> + >> >> + ret = 0; >> >> +out: >> >> + free(config); >> >> + fclose(fp); >> >> + >> >> + return ret; >> >> +} >> >> + >> >> +static int >> >> +parse_event(char *buf, uint64_t config[3]) { >> >> + char *token, *term; >> >> + int num, ret, val; >> >> + uint64_t mask; >> >> + >> >> + config[0] = config[1] = config[2] = 0; >> >> + >> >> + token = strtok(buf, ","); >> >> + while (token) { >> >> + errno = 0; >> >> + /* <term>=<value> */ >> >> + ret = sscanf(token, "%m[^=]=%i", &term, &val); >> >> + if (ret < 1) >> >> + return -ENODATA; >> >> + if (errno) >> >> + return -errno; >> >> + if (ret == 1) >> >> + val = 1; >> >> + >> >> + ret = get_term_format(term, &num, &mask); >> >> + free(term); >> >> + if (ret) >> >> + return ret; >> >> + >> >> + config[num] |= FIELD_PREP(mask, val); >> >> + token = strtok(NULL, ","); >> >> + } >> >> + >> >> + return 0; >> >> +} >> >> + >> >> +static int >> >> +get_event_config(const char *name, uint64_t config[3]) { >> >> + char path[PATH_MAX], buf[BUFSIZ]; >> >> + FILE *fp; >> >> + int ret; >> >> + >> >> + snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH "/%s/events/%s", >> >> rte_pmu.name, >name); >> >> + fp = fopen(path, "r"); >> >> + if (fp == NULL) >> >> + return -errno; >> >> + >> >> + ret = fread(buf, 1, sizeof(buf), fp); >> >> + if (ret == 0) { >> >> + fclose(fp); >> >> + >> >> + return -EINVAL; >> >> + } >> >> + fclose(fp); >> >> + buf[ret] = '\0'; >> >> + >> >> + return parse_event(buf, config); >> >> +} >> >> + >> >> +static int >> >> +do_perf_event_open(uint64_t config[3], int group_fd) { >> >> + struct perf_event_attr attr = { >> >> + .size = sizeof(struct perf_event_attr), >> >> + .type = PERF_TYPE_RAW, >> >> + .exclude_kernel = 1, >> >> + .exclude_hv = 1, >> >> + .disabled = 1, >> >> + }; >> >> + >> >> + pmu_arch_fixup_config(config); >> >> + >> >> + attr.config = config[0]; >> >> + attr.config1 = config[1]; >> >> + attr.config2 = config[2]; >> >> + >> >> + return syscall(SYS_perf_event_open, &attr, 0, -1, group_fd, 0); } >> >> + >> >> +static int >> >> +open_events(struct rte_pmu_event_group *group) { >> >> + struct rte_pmu_event *event; >> >> + uint64_t config[3]; >> >> + int num = 0, ret; >> >> + >> >> + /* group leader gets created first, with fd = -1 */ >> >> + group->fds[0] = -1; >> >> + >> >> + TAILQ_FOREACH(event, &rte_pmu.event_list, next) { >> >> + ret = get_event_config(event->name, config); >> >> + if (ret) >> >> + continue; >> >> + >> >> + ret = do_perf_event_open(config, group->fds[0]); >> >> + if (ret == -1) { >> >> + ret = -errno; >> >> + goto out; >> >> + } >> >> + >> >> + group->fds[event->index] = ret; >> >> + num++; >> >> + } >> >> + >> >> + return 0; >> >> +out: >> >> + for (--num; num >= 0; num--) { >> >> + close(group->fds[num]); >> >> + group->fds[num] = -1; >> >> + } >> >> + >> >> + >> >> + return ret; >> >> +} >> >> + >> >> +static int >> >> +mmap_events(struct rte_pmu_event_group *group) { >> >> + long page_size = sysconf(_SC_PAGE_SIZE); >> >> + unsigned int i; >> >> + void *addr; >> >> + int ret; >> >> + >> >> + for (i = 0; i < rte_pmu.num_group_events; i++) { >> >> + addr = mmap(0, page_size, PROT_READ, MAP_SHARED, group->fds[i], >> >> 0); >> >> + if (addr == MAP_FAILED) { >> >> + ret = -errno; >> >> + goto out; >> >> + } >> >> + >> >> + group->mmap_pages[i] = addr; >> >> + if (!group->mmap_pages[i]->cap_user_rdpmc) { >> >> + ret = -EPERM; >> >> + goto out; >> >> + } >> >> + } >> >> + >> >> + return 0; >> >> +out: >> >> + for (; i; i--) { >> >> + munmap(group->mmap_pages[i - 1], page_size); >> >> + group->mmap_pages[i - 1] = NULL; >> >> + } >> >> + >> >> + return ret; >> >> +} >> >> + >> >> +static void >> >> +cleanup_events(struct rte_pmu_event_group *group) { >> >> + unsigned int i; >> >> + >> >> + if (group->fds[0] != -1) >> >> + ioctl(group->fds[0], PERF_EVENT_IOC_DISABLE, >> >> +PERF_IOC_FLAG_GROUP); >> >> + >> >> + for (i = 0; i < rte_pmu.num_group_events; i++) { >> >> + if (group->mmap_pages[i]) { >> >> + munmap(group->mmap_pages[i], sysconf(_SC_PAGE_SIZE)); >> >> + group->mmap_pages[i] = NULL; >> >> + } >> >> + >> >> + if (group->fds[i] != -1) { >> >> + close(group->fds[i]); >> >> + group->fds[i] = -1; >> >> + } >> >> + } >> >> + >> >> + group->enabled = false; >> >> +} >> >> + >> >> +int >> >> +__rte_pmu_enable_group(void) >> >> +{ >> >> + struct rte_pmu_event_group *group = &RTE_PER_LCORE(_event_group); >> >> + int ret; >> >> + >> >> + if (rte_pmu.num_group_events == 0) >> >> + return -ENODEV; >> >> + >> >> + ret = open_events(group); >> >> + if (ret) >> >> + goto out; >> >> + >> >> + ret = mmap_events(group); >> >> + if (ret) >> >> + goto out; >> >> + >> >> + if (ioctl(group->fds[0], PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == >> >> -1) { >> >> + ret = -errno; >> >> + goto out; >> >> + } >> >> + >> >> + if (ioctl(group->fds[0], PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == >> >> -1) { >> >> + ret = -errno; >> >> + goto out; >> >> + } >> >> + >> >> + rte_spinlock_lock(&rte_pmu.lock); >> >> + TAILQ_INSERT_TAIL(&rte_pmu.event_group_list, group, next); >> > >> >Hmm.. so we insert pointer to TLS variable into the global list? >> >Wonder what would happen if that thread get terminated? >> >> Nothing special. Any pointers to that thread-local in that thread are >> invalided. >> >> >Can memory from its TLS block get re-used (by other thread or for other >> >purposes)? >> > >> >> Why would any other thread reuse that? >> Eventually main thread will need that data to do the cleanup. > >I understand that main thread would need to access that data. >I am not sure that it would be able to. >Imagine thread calls rte_pmu_read(...) and then terminates, while program >continues to run. >As I understand address of its RTE_PER_LCORE(_event_group) will still remain in >rte_pmu.event_group_list, even if it is probably not valid any more. >
Okay got your point. In DPDK that will not happen. We do not spawn/kill lcores in runtime. In other scenarios such approach will not work because once thread terminates it's per-thread-data becomes invalid. >> > >> >> + rte_spinlock_unlock(&rte_pmu.lock); >> >> + group->enabled = true; >> >> + >> >> + return 0; >> >> + >> >> +out: >> >> + cleanup_events(group); >> >> + >> >> + return ret; >> >> +} >> >> + >> >> +static int >> >> +scan_pmus(void) >> >> +{ >> >> + char path[PATH_MAX]; >> >> + struct dirent *dent; >> >> + const char *name; >> >> + DIR *dirp; >> >> + >> >> + dirp = opendir(EVENT_SOURCE_DEVICES_PATH); >> >> + if (dirp == NULL) >> >> + return -errno; >> >> + >> >> + while ((dent = readdir(dirp))) { >> >> + name = dent->d_name; >> >> + if (name[0] == '.') >> >> + continue; >> >> + >> >> + /* sysfs entry should either contain cpus or be a cpu */ >> >> + if (!strcmp(name, "cpu")) >> >> + break; >> >> + >> >> + snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH >> >> "/%s/cpus", name); >> >> + if (access(path, F_OK) == 0) >> >> + break; >> >> + } >> >> + >> >> + if (dent) { >> >> + rte_pmu.name = strdup(name); >> >> + if (rte_pmu.name == NULL) { >> >> + closedir(dirp); >> >> + >> >> + return -ENOMEM; >> >> + } >> >> + } >> >> + >> >> + closedir(dirp); >> >> + >> >> + return rte_pmu.name ? 0 : -ENODEV; } >> >> + >> >> +static struct rte_pmu_event * >> >> +new_event(const char *name) >> >> +{ >> >> + struct rte_pmu_event *event; >> >> + >> >> + event = calloc(1, sizeof(*event)); >> >> + if (event == NULL) >> >> + goto out; >> >> + >> >> + event->name = strdup(name); >> >> + if (event->name == NULL) { >> >> + free(event); >> >> + event = NULL; >> >> + } >> >> + >> >> +out: >> >> + return event; >> >> +} >> >> + >> >> +static void >> >> +free_event(struct rte_pmu_event *event) { >> >> + free(event->name); >> >> + free(event); >> >> +} >> >> + >> >> +int >> >> +rte_pmu_add_event(const char *name) { >> >> + struct rte_pmu_event *event; >> >> + char path[PATH_MAX]; >> >> + >> >> + if (rte_pmu.name == NULL) >> >> + return -ENODEV; >> >> + >> >> + if (rte_pmu.num_group_events + 1 >= MAX_NUM_GROUP_EVENTS) >> >> + return -ENOSPC; >> >> + >> >> + snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH "/%s/events/%s", >> >> rte_pmu.name, >name); >> >> + if (access(path, R_OK)) >> >> + return -ENODEV; >> >> + >> >> + TAILQ_FOREACH(event, &rte_pmu.event_list, next) { >> >> + if (!strcmp(event->name, name)) >> >> + return event->index; >> >> + continue; >> >> + } >> >> + >> >> + event = new_event(name); >> >> + if (event == NULL) >> >> + return -ENOMEM; >> >> + >> >> + event->index = rte_pmu.num_group_events++; >> >> + TAILQ_INSERT_TAIL(&rte_pmu.event_list, event, next); >> >> + >> >> + return event->index; >> >> +} >> >> + >> >> +int >> >> +rte_pmu_init(void) >> >> +{ >> >> + int ret; >> >> + >> >> + /* Allow calling init from multiple contexts within a single thread. >> >> This simplifies >> >> + * resource management a bit e.g in case fast-path tracepoint has >> >> already been enabled >> >> + * via command line but application doesn't care enough and performs >> >> init/fini again. >> >> + */ >> >> + if (rte_pmu.initialized != 0) { >> >> + rte_pmu.initialized++; >> >> + return 0; >> >> + } >> >> + >> >> + ret = scan_pmus(); >> >> + if (ret) >> >> + goto out; >> >> + >> >> + ret = pmu_arch_init(); >> >> + if (ret) >> >> + goto out; >> >> + >> >> + TAILQ_INIT(&rte_pmu.event_list); >> >> + TAILQ_INIT(&rte_pmu.event_group_list); >> >> + rte_spinlock_init(&rte_pmu.lock); >> >> + rte_pmu.initialized = 1; >> >> + >> >> + return 0; >> >> +out: >> >> + free(rte_pmu.name); >> >> + rte_pmu.name = NULL; >> >> + >> >> + return ret; >> >> +} >> >> + >> >> +void >> >> +rte_pmu_fini(void) >> >> +{ >> >> + struct rte_pmu_event_group *group, *tmp_group; >> >> + struct rte_pmu_event *event, *tmp_event; >> >> + >> >> + /* cleanup once init count drops to zero */ >> >> + if (rte_pmu.initialized == 0 || --rte_pmu.initialized != 0) >> >> + return; >> >> + >> >> + RTE_TAILQ_FOREACH_SAFE(event, &rte_pmu.event_list, next, tmp_event) { >> >> + TAILQ_REMOVE(&rte_pmu.event_list, event, next); >> >> + free_event(event); >> >> + } >> >> + >> >> + RTE_TAILQ_FOREACH_SAFE(group, &rte_pmu.event_group_list, next, >> >> tmp_group) { >> >> + TAILQ_REMOVE(&rte_pmu.event_group_list, group, next); >> >> + cleanup_events(group); >> >> + } >> >> + >> >> + pmu_arch_fini(); >> >> + free(rte_pmu.name); >> >> + rte_pmu.name = NULL; >> >> + rte_pmu.num_group_events = 0; >> >> +} >> >> diff --git a/lib/pmu/rte_pmu.h b/lib/pmu/rte_pmu.h new file mode >> >> 100644 index 0000000000..6b664c3336 >> >> --- /dev/null >> >> +++ b/lib/pmu/rte_pmu.h >> >> @@ -0,0 +1,212 @@ >> >> +/* SPDX-License-Identifier: BSD-3-Clause >> >> + * Copyright(c) 2023 Marvell >> >> + */ >> >> + >> >> +#ifndef _RTE_PMU_H_ >> >> +#define _RTE_PMU_H_ >> >> + >> >> +/** >> >> + * @file >> >> + * >> >> + * PMU event tracing operations >> >> + * >> >> + * This file defines generic API and types necessary to setup PMU >> >> +and >> >> + * read selected counters in runtime. >> >> + */ >> >> + >> >> +#ifdef __cplusplus >> >> +extern "C" { >> >> +#endif >> >> + >> >> +#include <linux/perf_event.h> >> >> + >> >> +#include <rte_atomic.h> >> >> +#include <rte_branch_prediction.h> #include <rte_common.h> >> >> +#include <rte_compat.h> #include <rte_spinlock.h> >> >> + >> >> +/** Maximum number of events in a group */ #define >> >> +MAX_NUM_GROUP_EVENTS 8 >> > >> >forgot RTE_ prefix. >> >In fact, do you really need number of events in group to be hard-coded? >> >Couldn't mmap_pages[] and fds[] be allocated dynamically by enable_group()? >> > >> >> 8 is reasonable number I think. X86/ARM have actually less that that (was >> that something like >4?). >> Moreover events are scheduled as a group so there must be enough hw >> counters available for that to succeed. So this number should cover current >> needs. > >If you think 8 will be enough to cover all possible future cases - I am ok >either way. >Still need RTE_ prefix for it. > Okay that can be added. >> >> + >> >> +/** >> >> + * A structure describing a group of events. >> >> + */ >> >> +struct rte_pmu_event_group { >> >> + struct perf_event_mmap_page *mmap_pages[MAX_NUM_GROUP_EVENTS]; /**< >> >> array of user pages >*/ >> >> + int fds[MAX_NUM_GROUP_EVENTS]; /**< array of event descriptors */ >> >> + bool enabled; /**< true if group was enabled on particular lcore */ >> >> + TAILQ_ENTRY(rte_pmu_event_group) next; /**< list entry */ } >> >> +__rte_cache_aligned; >> >> + >> > >> >Even if we'd decide to keep rte_pmu_read() as static inline (still >> >not sure it is a good idea), >> >> We want to save as much cpu cycles as we possibly can and inlining >> does helps in that matter. > >Ok, so asking same question from different thread: how many cycles it will >save? >What is the difference in terms of performance when you have this function >inlined vs not inlined? > On x86 setup which is not under load, no cpusets configured, etc *just* not inlining rte_pmu_read() decreases performance by roughly 24% (44 vs 58 cpu cycles). At least that is reported by trace_perf_autotest. >> >why these two struct below (rte_pmu_event and rte_pmu) have to be public? >> >I think both can be safely moved away from public headers. >> > >> >> struct rte_pmu_event can be hidden I guess. >> struct rte_pmu is used in this header hence cannot be moved elsewhere. > >Not sure why? >Is that because you use it inside rte_pmu_read()? >But that check I think can be safely moved into __rte_pmu_enable_group() or >probably even into >rte_pmu_add_event(). No, we should not do that. Otherwise we'll need to call function. Even though check will happen early on still function prologue/epilogue will happen. This takes cycles. > >> > >> >> +/** >> >> + * A structure describing an event. >> >> + */ >> >> +struct rte_pmu_event { >> >> + char *name; /**< name of an event */ >> >> + unsigned int index; /**< event index into fds/mmap_pages */ >> >> + TAILQ_ENTRY(rte_pmu_event) next; /**< list entry */ }; >> > >> >> + >> >> +/** >> >> + * A PMU state container. >> >> + */ >> >> +struct rte_pmu { >> >> + char *name; /**< name of core PMU listed under >> >> /sys/bus/event_source/devices */ >> >> + rte_spinlock_t lock; /**< serialize access to event group list */ >> >> + TAILQ_HEAD(, rte_pmu_event_group) event_group_list; /**< list of event >> >> groups */ >> >> + unsigned int num_group_events; /**< number of events in a group */ >> >> + TAILQ_HEAD(, rte_pmu_event) event_list; /**< list of matching events */ >> >> + unsigned int initialized; /**< initialization counter */ }; >> >> + >> >> +/** lcore event group */ >> >> +RTE_DECLARE_PER_LCORE(struct rte_pmu_event_group, _event_group); >> >> + >> >> +/** PMU state container */ >> >> +extern struct rte_pmu rte_pmu; >> >> + >> >> +/** Each architecture supporting PMU needs to provide its own >> >> +version */ #ifndef rte_pmu_pmc_read #define >> >> +rte_pmu_pmc_read(index) ({ 0; }) #endif >> >> + >> >> +/** >> >> + * @warning >> >> + * @b EXPERIMENTAL: this API may change without prior notice >> >> + * >> >> + * Read PMU counter. >> >> + * >> >> + * @warning This should be not called directly. >> >> + * >> >> + * @param pc >> >> + * Pointer to the mmapped user page. >> >> + * @return >> >> + * Counter value read from hardware. >> >> + */ >> >> +static __rte_always_inline uint64_t __rte_pmu_read_userpage(struct >> >> +perf_event_mmap_page *pc) { >> >> + uint64_t width, offset; >> >> + uint32_t seq, index; >> >> + int64_t pmc; >> >> + >> >> + for (;;) { >> >> + seq = pc->lock; >> >> + rte_compiler_barrier(); >> >> + index = pc->index; >> >> + offset = pc->offset; >> >> + width = pc->pmc_width; >> >> + >> >> + /* index set to 0 means that particular counter cannot be used >> >> */ >> >> + if (likely(pc->cap_user_rdpmc && index)) { >> > >> >In mmap_events() you return EPERM if cap_user_rdpmc is not enabled. >> >Do you need another check here? Or this capability can be disabled by >> >kernel at run-time? >> > >> >> That extra check in mmap_event() may be removed actually. Some archs >> allow disabling reading rdpmc (I think that on x86 one can do that) so this >> check needs to stay. >> >> > >> >> + pmc = rte_pmu_pmc_read(index - 1); >> >> + pmc <<= 64 - width; >> >> + pmc >>= 64 - width; >> >> + offset += pmc; >> >> + } >> >> + >> >> + rte_compiler_barrier(); >> >> + >> >> + if (likely(pc->lock == seq)) >> >> + return offset; >> >> + } >> >> + >> >> + return 0; >> >> +} >> >> + >> >> +/** >> >> + * @warning >> >> + * @b EXPERIMENTAL: this API may change without prior notice >> >> + * >> >> + * Enable group of events on the calling lcore. >> >> + * >> >> + * @warning This should be not called directly. >> > >> >__rte_internal ? >> > >> >> No this cannot be internal because that will make functions calling it >> internal as well hence apps won't be able to use that. This has >> already been brought up by one of the reviewers. > >Ok, then we probably can mark it with ' @internal' tag in formal comments? > I added a warning not to call that directly. Since function is not internal (in DPDK parlance) per se I don’t think we should add more confusion that extra tag. >> >> >> + * >> >> + * @return >> >> + * 0 in case of success, negative value otherwise. >> >> + */ >> >> +__rte_experimental >> >> +int >> >> +__rte_pmu_enable_group(void); >> >> + >> >> +/** >> >> + * @warning >> >> + * @b EXPERIMENTAL: this API may change without prior notice >> >> + * >> >> + * Initialize PMU library. >> >> + * >> >> + * @warning This should be not called directly. >> > >> >Hmm.. then who should call it? >> >If it not supposed to be called directly, why to declare it here? >> > >> >> This is inlined and has one caller i.e rte_pmu_read(). > >I thought we are talking here about rte_pmu_init(). >I don't see where it is inlined and still not clear why it can't be called >directly. > No this cannot be called by init because groups are configured in runtime. That is why __rte_pmu_enable_group() is called once in rte_pmu_read(). *Other* code should not call that directly. And yes, that is not inlined - my mistake. >> >> + * >> >> + * @return >> >> + * 0 in case of success, negative value otherwise. >> >> + */ >> > >> >Probably worth to mention that this function is not MT safe. >> >Same for _fini_ and add_event. >> >Also worth to mention that all control-path functions >> >(init/fini/add_event) and data-path (pmu_read) can't be called concurrently. >> > >> >> Yes they are meant to be called from main thread. > >Ok, then please add that into formal API comments. > >> >> +__rte_experimental >> >> +int >> >> +rte_pmu_init(void); >> >> +