Add support for programming PMU counters and reading their values
in runtime bypassing kernel completely.

This is especially useful in cases where CPU cores are isolated
(nohz_full) i.e run dedicated tasks. In such cases one cannot use
standard perf utility without sacrificing latency and performance.

Signed-off-by: Tomasz Duszynski <tduszyn...@marvell.com>
Acked-by: Morten Brørup <m...@smartsharesystems.com>
---
 MAINTAINERS                            |   5 +
 app/test/meson.build                   |   1 +
 app/test/test_pmu.c                    |  55 +++
 doc/api/doxy-api-index.md              |   3 +-
 doc/api/doxy-api.conf.in               |   1 +
 doc/guides/prog_guide/profile_app.rst  |   8 +
 doc/guides/rel_notes/release_23_03.rst |   9 +
 lib/meson.build                        |   1 +
 lib/pmu/meson.build                    |  13 +
 lib/pmu/pmu_private.h                  |  29 ++
 lib/pmu/rte_pmu.c                      | 464 +++++++++++++++++++++++++
 lib/pmu/rte_pmu.h                      | 205 +++++++++++
 lib/pmu/version.map                    |  20 ++
 13 files changed, 813 insertions(+), 1 deletion(-)
 create mode 100644 app/test/test_pmu.c
 create mode 100644 lib/pmu/meson.build
 create mode 100644 lib/pmu/pmu_private.h
 create mode 100644 lib/pmu/rte_pmu.c
 create mode 100644 lib/pmu/rte_pmu.h
 create mode 100644 lib/pmu/version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 9a0f416d2e..9f13eafd95 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1697,6 +1697,11 @@ M: Nithin Dabilpuram <ndabilpu...@marvell.com>
 M: Pavan Nikhilesh <pbhagavat...@marvell.com>
 F: lib/node/
 
+PMU - EXPERIMENTAL
+M: Tomasz Duszynski <tduszyn...@marvell.com>
+F: lib/pmu/
+F: app/test/test_pmu*
+
 
 Test Applications
 -----------------
diff --git a/app/test/meson.build b/app/test/meson.build
index f34d19e3c3..7b6b69dcf1 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -111,6 +111,7 @@ test_sources = files(
         'test_reciprocal_division_perf.c',
         'test_red.c',
         'test_pie.c',
+        'test_pmu.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
diff --git a/app/test/test_pmu.c b/app/test/test_pmu.c
new file mode 100644
index 0000000000..a9bfb1a427
--- /dev/null
+++ b/app/test/test_pmu.c
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell International Ltd.
+ */
+
+#include "test.h"
+
+#ifndef RTE_EXEC_ENV_LINUX
+
+static int
+test_pmu(void)
+{
+       printf("pmu_autotest only supported on Linux, skipping test\n");
+       return TEST_SKIPPED;
+}
+
+#else
+
+#include <rte_pmu.h>
+
+static int
+test_pmu_read(void)
+{
+       int tries = 10, event = -1;
+       uint64_t val = 0;
+
+       if (rte_pmu_init() < 0)
+               return TEST_FAILED;
+
+       while (tries--)
+               val += rte_pmu_read(event);
+
+       rte_pmu_fini();
+
+       return val ? TEST_SUCCESS : TEST_FAILED;
+}
+
+static struct unit_test_suite pmu_tests = {
+       .suite_name = "pmu autotest",
+       .setup = NULL,
+       .teardown = NULL,
+       .unit_test_cases = {
+               TEST_CASE(test_pmu_read),
+               TEST_CASES_END()
+       }
+};
+
+static int
+test_pmu(void)
+{
+       return unit_test_suite_runner(&pmu_tests);
+}
+
+#endif /* RTE_EXEC_ENV_LINUX */
+
+REGISTER_TEST_COMMAND(pmu_autotest, test_pmu);
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index de488c7abf..7f1938f92f 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -222,7 +222,8 @@ The public API headers are grouped by topics:
   [log](@ref rte_log.h),
   [errno](@ref rte_errno.h),
   [trace](@ref rte_trace.h),
-  [trace_point](@ref rte_trace_point.h)
+  [trace_point](@ref rte_trace_point.h),
+  [pmu](@ref rte_pmu.h)
 
 - **misc**:
   [EAL config](@ref rte_eal.h),
diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in
index f0886c3bd1..920e615996 100644
--- a/doc/api/doxy-api.conf.in
+++ b/doc/api/doxy-api.conf.in
@@ -63,6 +63,7 @@ INPUT                   = @TOPDIR@/doc/api/doxy-api-index.md \
                           @TOPDIR@/lib/pci \
                           @TOPDIR@/lib/pdump \
                           @TOPDIR@/lib/pipeline \
+                          @TOPDIR@/lib/pmu \
                           @TOPDIR@/lib/port \
                           @TOPDIR@/lib/power \
                           @TOPDIR@/lib/rawdev \
diff --git a/doc/guides/prog_guide/profile_app.rst 
b/doc/guides/prog_guide/profile_app.rst
index 14292d4c25..a8b501fe0c 100644
--- a/doc/guides/prog_guide/profile_app.rst
+++ b/doc/guides/prog_guide/profile_app.rst
@@ -7,6 +7,14 @@ Profile Your Application
 The following sections describe methods of profiling DPDK applications on
 different architectures.
 
+Performance counter based profiling
+-----------------------------------
+
+Majority of architectures support some sort hardware measurement unit which 
provides a set of
+programmable counters that monitor specific events. There are different tools 
which can gather
+that information, perf being an example here. Though in some scenarios, eg. 
when CPU cores are
+isolated (nohz_full) and run dedicated tasks, using perf is less than ideal. 
In such cases one can
+read specific events directly from application via ``rte_pmu_read()``.
 
 Profiling on x86
 ----------------
diff --git a/doc/guides/rel_notes/release_23_03.rst 
b/doc/guides/rel_notes/release_23_03.rst
index 73f5d94e14..733541d56c 100644
--- a/doc/guides/rel_notes/release_23_03.rst
+++ b/doc/guides/rel_notes/release_23_03.rst
@@ -55,10 +55,19 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Added PMU library.**
+
+  Added a new PMU (performance measurement unit) library which allows 
applications
+  to perform self monitoring activities without depending on external 
utilities like perf.
+  After integration with :doc:`../prog_guide/trace_lib` data gathered from 
hardware counters
+  can be stored in CTF format for further analysis.
+
 * **Updated AMD axgbe driver.**
 
   * Added multi-process support.
 
+* **Added multi-process support for axgbe PMD.**
+
 * **Updated Corigine nfp driver.**
 
   * Added support for meter options.
diff --git a/lib/meson.build b/lib/meson.build
index a90fee31b7..7132131b5c 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -11,6 +11,7 @@
 libraries = [
         'kvargs', # eal depends on kvargs
         'telemetry', # basic info querying
+        'pmu',
         'eal', # everything depends on eal
         'ring',
         'rcu', # rcu depends on ring
diff --git a/lib/pmu/meson.build b/lib/pmu/meson.build
new file mode 100644
index 0000000000..a4160b494e
--- /dev/null
+++ b/lib/pmu/meson.build
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(C) 2023 Marvell International Ltd.
+
+if not is_linux
+    build = false
+    reason = 'only supported on Linux'
+    subdir_done()
+endif
+
+includes = [global_inc]
+
+sources = files('rte_pmu.c')
+headers = files('rte_pmu.h')
diff --git a/lib/pmu/pmu_private.h b/lib/pmu/pmu_private.h
new file mode 100644
index 0000000000..849549b125
--- /dev/null
+++ b/lib/pmu/pmu_private.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Marvell
+ */
+
+#ifndef _PMU_PRIVATE_H_
+#define _PMU_PRIVATE_H_
+
+/**
+ * Architecture specific PMU init callback.
+ *
+ * @return
+ *   0 in case of success, negative value otherwise.
+ */
+int
+pmu_arch_init(void);
+
+/**
+ * Architecture specific PMU cleanup callback.
+ */
+void
+pmu_arch_fini(void);
+
+/**
+ * Apply architecture specific settings to config before passing it to syscall.
+ */
+void
+pmu_arch_fixup_config(uint64_t config[3]);
+
+#endif /* _PMU_PRIVATE_H_ */
diff --git a/lib/pmu/rte_pmu.c b/lib/pmu/rte_pmu.c
new file mode 100644
index 0000000000..4cf3161155
--- /dev/null
+++ b/lib/pmu/rte_pmu.c
@@ -0,0 +1,464 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell International Ltd.
+ */
+
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <regex.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include <rte_atomic.h>
+#include <rte_per_lcore.h>
+#include <rte_pmu.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+#include "pmu_private.h"
+
+#define EVENT_SOURCE_DEVICES_PATH "/sys/bus/event_source/devices"
+
+#ifndef GENMASK_ULL
+#define GENMASK_ULL(h, l) ((~0ULL - (1ULL << (l)) + 1) & (~0ULL >> ((64 - 1 - 
(h)))))
+#endif
+
+#ifndef FIELD_PREP
+#define FIELD_PREP(m, v) (((uint64_t)(v) << (__builtin_ffsll(m) - 1)) & (m))
+#endif
+
+RTE_DEFINE_PER_LCORE(struct rte_pmu_event_group, _event_group);
+struct rte_pmu rte_pmu;
+
+/*
+ * Following __rte_weak functions provide default no-op. Architectures should 
override them if
+ * necessary.
+ */
+
+int
+__rte_weak pmu_arch_init(void)
+{
+       return 0;
+}
+
+void
+__rte_weak pmu_arch_fini(void)
+{
+}
+
+void
+__rte_weak pmu_arch_fixup_config(uint64_t __rte_unused config[3])
+{
+}
+
+static int
+get_term_format(const char *name, int *num, uint64_t *mask)
+{
+       char *config = NULL;
+       char path[PATH_MAX];
+       int high, low, ret;
+       FILE *fp;
+
+       /* quiesce -Wmaybe-uninitialized warning */
+       *num = 0;
+       *mask = 0;
+
+       snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH "/%s/format/%s", 
rte_pmu.name, name);
+       fp = fopen(path, "r");
+       if (fp == NULL)
+               return -errno;
+
+       errno = 0;
+       ret = fscanf(fp, "%m[^:]:%d-%d", &config, &low, &high);
+       if (ret < 2) {
+               ret = -ENODATA;
+               goto out;
+       }
+       if (errno) {
+               ret = -errno;
+               goto out;
+       }
+
+       if (ret == 2)
+               high = low;
+
+       *mask = GENMASK_ULL(high, low);
+       /* Last digit should be [012]. If last digit is missing 0 is implied. */
+       *num = config[strlen(config) - 1];
+       *num = isdigit(*num) ? *num - '0' : 0;
+
+       ret = 0;
+out:
+       free(config);
+       fclose(fp);
+
+       return ret;
+}
+
+static int
+parse_event(char *buf, uint64_t config[3])
+{
+       char *token, *term;
+       int num, ret, val;
+       uint64_t mask;
+
+       config[0] = config[1] = config[2] = 0;
+
+       token = strtok(buf, ",");
+       while (token) {
+               errno = 0;
+               /* <term>=<value> */
+               ret = sscanf(token, "%m[^=]=%i", &term, &val);
+               if (ret < 1)
+                       return -ENODATA;
+               if (errno)
+                       return -errno;
+               if (ret == 1)
+                       val = 1;
+
+               ret = get_term_format(term, &num, &mask);
+               free(term);
+               if (ret)
+                       return ret;
+
+               config[num] |= FIELD_PREP(mask, val);
+               token = strtok(NULL, ",");
+       }
+
+       return 0;
+}
+
+static int
+get_event_config(const char *name, uint64_t config[3])
+{
+       char path[PATH_MAX], buf[BUFSIZ];
+       FILE *fp;
+       int ret;
+
+       snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH "/%s/events/%s", 
rte_pmu.name, name);
+       fp = fopen(path, "r");
+       if (fp == NULL)
+               return -errno;
+
+       ret = fread(buf, 1, sizeof(buf), fp);
+       if (ret == 0) {
+               fclose(fp);
+
+               return -EINVAL;
+       }
+       fclose(fp);
+       buf[ret] = '\0';
+
+       return parse_event(buf, config);
+}
+
+static int
+do_perf_event_open(uint64_t config[3], int group_fd)
+{
+       struct perf_event_attr attr = {
+               .size = sizeof(struct perf_event_attr),
+               .type = PERF_TYPE_RAW,
+               .exclude_kernel = 1,
+               .exclude_hv = 1,
+               .disabled = 1,
+       };
+
+       pmu_arch_fixup_config(config);
+
+       attr.config = config[0];
+       attr.config1 = config[1];
+       attr.config2 = config[2];
+
+       return syscall(SYS_perf_event_open, &attr, 0, -1, group_fd, 0);
+}
+
+static int
+open_events(struct rte_pmu_event_group *group)
+{
+       struct rte_pmu_event *event;
+       uint64_t config[3];
+       int num = 0, ret;
+
+       /* group leader gets created first, with fd = -1 */
+       group->fds[0] = -1;
+
+       TAILQ_FOREACH(event, &rte_pmu.event_list, next) {
+               ret = get_event_config(event->name, config);
+               if (ret)
+                       continue;
+
+               ret = do_perf_event_open(config, group->fds[0]);
+               if (ret == -1) {
+                       ret = -errno;
+                       goto out;
+               }
+
+               group->fds[event->index] = ret;
+               num++;
+       }
+
+       return 0;
+out:
+       for (--num; num >= 0; num--) {
+               close(group->fds[num]);
+               group->fds[num] = -1;
+       }
+
+
+       return ret;
+}
+
+static int
+mmap_events(struct rte_pmu_event_group *group)
+{
+       long page_size = sysconf(_SC_PAGE_SIZE);
+       unsigned int i;
+       void *addr;
+       int ret;
+
+       for (i = 0; i < rte_pmu.num_group_events; i++) {
+               addr = mmap(0, page_size, PROT_READ, MAP_SHARED, group->fds[i], 
0);
+               if (addr == MAP_FAILED) {
+                       ret = -errno;
+                       goto out;
+               }
+
+               group->mmap_pages[i] = addr;
+       }
+
+       return 0;
+out:
+       for (; i; i--) {
+               munmap(group->mmap_pages[i - 1], page_size);
+               group->mmap_pages[i - 1] = NULL;
+       }
+
+       return ret;
+}
+
+static void
+cleanup_events(struct rte_pmu_event_group *group)
+{
+       unsigned int i;
+
+       if (group->fds[0] != -1)
+               ioctl(group->fds[0], PERF_EVENT_IOC_DISABLE, 
PERF_IOC_FLAG_GROUP);
+
+       for (i = 0; i < rte_pmu.num_group_events; i++) {
+               if (group->mmap_pages[i]) {
+                       munmap(group->mmap_pages[i], sysconf(_SC_PAGE_SIZE));
+                       group->mmap_pages[i] = NULL;
+               }
+
+               if (group->fds[i] != -1) {
+                       close(group->fds[i]);
+                       group->fds[i] = -1;
+               }
+       }
+
+       group->enabled = false;
+}
+
+int __rte_noinline
+rte_pmu_enable_group(void)
+{
+       struct rte_pmu_event_group *group = &RTE_PER_LCORE(_event_group);
+       int ret;
+
+       if (rte_pmu.num_group_events == 0)
+               return -ENODEV;
+
+       ret = open_events(group);
+       if (ret)
+               goto out;
+
+       ret = mmap_events(group);
+       if (ret)
+               goto out;
+
+       if (ioctl(group->fds[0], PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == 
-1) {
+               ret = -errno;
+               goto out;
+       }
+
+       if (ioctl(group->fds[0], PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == 
-1) {
+               ret = -errno;
+               goto out;
+       }
+
+       rte_spinlock_lock(&rte_pmu.lock);
+       TAILQ_INSERT_TAIL(&rte_pmu.event_group_list, group, next);
+       rte_spinlock_unlock(&rte_pmu.lock);
+       group->enabled = true;
+
+       return 0;
+
+out:
+       cleanup_events(group);
+
+       return ret;
+}
+
+static int
+scan_pmus(void)
+{
+       char path[PATH_MAX];
+       struct dirent *dent;
+       const char *name;
+       DIR *dirp;
+
+       dirp = opendir(EVENT_SOURCE_DEVICES_PATH);
+       if (dirp == NULL)
+               return -errno;
+
+       while ((dent = readdir(dirp))) {
+               name = dent->d_name;
+               if (name[0] == '.')
+                       continue;
+
+               /* sysfs entry should either contain cpus or be a cpu */
+               if (!strcmp(name, "cpu"))
+                       break;
+
+               snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH 
"/%s/cpus", name);
+               if (access(path, F_OK) == 0)
+                       break;
+       }
+
+       if (dent) {
+               rte_pmu.name = strdup(name);
+               if (rte_pmu.name == NULL) {
+                       closedir(dirp);
+
+                       return -ENOMEM;
+               }
+       }
+
+       closedir(dirp);
+
+       return rte_pmu.name ? 0 : -ENODEV;
+}
+
+static struct rte_pmu_event *
+new_event(const char *name)
+{
+       struct rte_pmu_event *event;
+
+       event = calloc(1, sizeof(*event));
+       if (event == NULL)
+               goto out;
+
+       event->name = strdup(name);
+       if (event->name == NULL) {
+               free(event);
+               event = NULL;
+       }
+
+out:
+       return event;
+}
+
+static void
+free_event(struct rte_pmu_event *event)
+{
+       free(event->name);
+       free(event);
+}
+
+int
+rte_pmu_add_event(const char *name)
+{
+       struct rte_pmu_event *event;
+       char path[PATH_MAX];
+
+       if (rte_pmu.name == NULL)
+               return -ENODEV;
+
+       if (rte_pmu.num_group_events + 1 >= MAX_NUM_GROUP_EVENTS)
+               return -ENOSPC;
+
+       snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH "/%s/events/%s", 
rte_pmu.name, name);
+       if (access(path, R_OK))
+               return -ENODEV;
+
+       TAILQ_FOREACH(event, &rte_pmu.event_list, next) {
+               if (!strcmp(event->name, name))
+                       return event->index;
+               continue;
+       }
+
+       event = new_event(name);
+       if (event == NULL)
+               return -ENOMEM;
+
+       event->index = rte_pmu.num_group_events++;
+       TAILQ_INSERT_TAIL(&rte_pmu.event_list, event, next);
+
+       return event->index;
+}
+
+int
+rte_pmu_init(void)
+{
+       int ret;
+
+       /* Allow calling init from multiple contexts within a single thread. 
This simplifies
+        * resource management a bit e.g in case fast-path tracepoint has 
already been enabled
+        * via command line but application doesn't care enough and performs 
init/fini again.
+        */
+       if (rte_pmu.initialized) {
+               rte_pmu.initialized++;
+               return 0;
+       }
+
+       ret = scan_pmus();
+       if (ret)
+               goto out;
+
+       ret = pmu_arch_init();
+       if (ret)
+               goto out;
+
+       TAILQ_INIT(&rte_pmu.event_list);
+       TAILQ_INIT(&rte_pmu.event_group_list);
+       rte_spinlock_init(&rte_pmu.lock);
+       rte_pmu.initialized = 1;
+
+       return 0;
+out:
+       free(rte_pmu.name);
+       rte_pmu.name = NULL;
+
+       return ret;
+}
+
+void
+rte_pmu_fini(void)
+{
+       struct rte_pmu_event_group *group, *tmp_group;
+       struct rte_pmu_event *event, *tmp_event;
+
+       /* cleanup once init count drops to zero */
+       if (!rte_pmu.initialized || --rte_pmu.initialized)
+               return;
+
+       RTE_TAILQ_FOREACH_SAFE(event, &rte_pmu.event_list, next, tmp_event) {
+               TAILQ_REMOVE(&rte_pmu.event_list, event, next);
+               free_event(event);
+       }
+
+       RTE_TAILQ_FOREACH_SAFE(group, &rte_pmu.event_group_list, next, 
tmp_group) {
+               TAILQ_REMOVE(&rte_pmu.event_group_list, group, next);
+               cleanup_events(group);
+       }
+
+       pmu_arch_fini();
+       free(rte_pmu.name);
+       rte_pmu.name = NULL;
+       rte_pmu.num_group_events = 0;
+}
diff --git a/lib/pmu/rte_pmu.h b/lib/pmu/rte_pmu.h
new file mode 100644
index 0000000000..e360375a0c
--- /dev/null
+++ b/lib/pmu/rte_pmu.h
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Marvell
+ */
+
+#ifndef _RTE_PMU_H_
+#define _RTE_PMU_H_
+
+/**
+ * @file
+ *
+ * PMU event tracing operations
+ *
+ * This file defines generic API and types necessary to setup PMU and
+ * read selected counters in runtime.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/perf_event.h>
+
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_spinlock.h>
+
+/** Maximum number of events in a group */
+#define MAX_NUM_GROUP_EVENTS 8
+
+/**
+ * A structure describing a group of events.
+ */
+struct rte_pmu_event_group {
+       struct perf_event_mmap_page *mmap_pages[MAX_NUM_GROUP_EVENTS]; /**< 
array of user pages */
+       int fds[MAX_NUM_GROUP_EVENTS]; /**< array of event descriptors */
+       bool enabled; /**< true if group was enabled on particular lcore */
+       TAILQ_ENTRY(rte_pmu_event_group) next; /**< list entry */
+} __rte_cache_aligned;
+
+/**
+ * A structure describing an event.
+ */
+struct rte_pmu_event {
+       char *name; /**< name of an event */
+       unsigned int index; /**< event index into fds/mmap_pages */
+       TAILQ_ENTRY(rte_pmu_event) next; /**< list entry */
+};
+
+/**
+ * A PMU state container.
+ */
+struct rte_pmu {
+       char *name; /**< name of core PMU listed under 
/sys/bus/event_source/devices */
+       rte_spinlock_t lock; /**< serialize access to event group list */
+       TAILQ_HEAD(, rte_pmu_event_group) event_group_list; /**< list of event 
groups */
+       unsigned int num_group_events; /**< number of events in a group */
+       TAILQ_HEAD(, rte_pmu_event) event_list; /**< list of matching events */
+       unsigned int initialized; /**< initialization counter */
+};
+
+/** lcore event group */
+RTE_DECLARE_PER_LCORE(struct rte_pmu_event_group, _event_group);
+
+/** PMU state container */
+extern struct rte_pmu rte_pmu;
+
+/** Each architecture supporting PMU needs to provide its own version */
+#ifndef rte_pmu_pmc_read
+#define rte_pmu_pmc_read(index) ({ 0; })
+#endif
+
+/**
+ * @internal
+ *
+ * Read PMU counter.
+ *
+ * @param pc
+ *   Pointer to the mmapped user page.
+ * @return
+ *   Counter value read from hardware.
+ */
+__rte_internal
+static __rte_always_inline uint64_t
+rte_pmu_read_userpage(struct perf_event_mmap_page *pc)
+{
+       uint64_t width, offset;
+       uint32_t seq, index;
+       int64_t pmc;
+
+       for (;;) {
+               seq = pc->lock;
+               rte_compiler_barrier();
+               index = pc->index;
+               offset = pc->offset;
+               width = pc->pmc_width;
+
+               /* index set to 0 means that particular counter cannot be used 
*/
+               if (likely(pc->cap_user_rdpmc && index)) {
+                       pmc = rte_pmu_pmc_read(index - 1);
+                       pmc <<= 64 - width;
+                       pmc >>= 64 - width;
+                       offset += pmc;
+               }
+
+               rte_compiler_barrier();
+
+               if (likely(pc->lock == seq))
+                       return offset;
+       }
+
+       return 0;
+}
+
+/**
+ * @internal
+ *
+ * Enable group of events on the calling lcore.
+ *
+ * @return
+ *   0 in case of success, negative value otherwise.
+ */
+__rte_internal
+int
+rte_pmu_enable_group(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Initialize PMU library.
+ *
+ * @return
+ *   0 in case of success, negative value otherwise.
+ */
+__rte_experimental
+int
+rte_pmu_init(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Finalize PMU library. This should be called after PMU counters are no 
longer being read.
+ */
+__rte_experimental
+void
+rte_pmu_fini(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Add event to the group of enabled events.
+ *
+ * @param name
+ *   Name of an event listed under /sys/bus/event_source/devices/pmu/events.
+ * @return
+ *   Event index in case of success, negative value otherwise.
+ */
+__rte_experimental
+int
+rte_pmu_add_event(const char *name);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Read hardware counter configured to count occurrences of an event.
+ *
+ * @param index
+ *   Index of an event to be read.
+ * @return
+ *   Event value read from register. In case of errors or lack of support
+ *   0 is returned. In other words, stream of zeros in a trace file
+ *   indicates problem with reading particular PMU event register.
+ */
+__rte_experimental
+static __rte_always_inline uint64_t
+rte_pmu_read(unsigned int index)
+{
+       struct rte_pmu_event_group *group = &RTE_PER_LCORE(_event_group);
+       int ret;
+
+       if (unlikely(!rte_pmu.initialized))
+               return 0;
+
+       if (unlikely(!group->enabled)) {
+               ret = rte_pmu_enable_group();
+               if (ret)
+                       return 0;
+       }
+
+       if (unlikely(index >= rte_pmu.num_group_events))
+               return 0;
+
+       return rte_pmu_read_userpage(group->mmap_pages[index]);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PMU_H_ */
diff --git a/lib/pmu/version.map b/lib/pmu/version.map
new file mode 100644
index 0000000000..50fb0f354e
--- /dev/null
+++ b/lib/pmu/version.map
@@ -0,0 +1,20 @@
+DPDK_23 {
+       local: *;
+};
+
+EXPERIMENTAL {
+       global:
+
+       per_lcore__event_group;
+       rte_pmu;
+       rte_pmu_add_event;
+       rte_pmu_fini;
+       rte_pmu_init;
+       rte_pmu_read;
+};
+
+INTERNAL {
+       global:
+
+       rte_pmu_enable_group;
+};
-- 
2.34.1

Reply via email to