On Sat, Sep 28, 2024 at 3:37 AM Tomasz Duszynski <tduszyn...@marvell.com> wrote:
>
> Add support for programming PMU counters and reading their values
> in runtime bypassing kernel completely.
>
> This is especially useful in cases where CPU cores are isolated
> i.e run dedicated tasks. In such cases one cannot use standard
> perf utility without sacrificing latency and performance.
>
> Signed-off-by: Tomasz Duszynski <tduszyn...@marvell.com>
> ---
>  MAINTAINERS                            |   5 +
>  app/test/meson.build                   |   1 +
>  app/test/test_pmu.c                    |  62 ++++
>  doc/api/doxy-api-index.md              |   3 +-
>  doc/api/doxy-api.conf.in               |   1 +
>  doc/guides/prog_guide/profile_app.rst  |  26 ++
>  doc/guides/rel_notes/release_24_11.rst |   7 +
>  lib/meson.build                        |   1 +
>  lib/pmu/meson.build                    |  15 +
>  lib/pmu/pmu_private.h                  |  32 ++
>  lib/pmu/rte_pmu.c                      | 465 +++++++++++++++++++++++++
>  lib/pmu/rte_pmu.h                      | 206 +++++++++++
>  lib/pmu/version.map                    |  14 +
>  13 files changed, 837 insertions(+), 1 deletion(-)
>  create mode 100644 app/test/test_pmu.c
>  create mode 100644 lib/pmu/meson.build
>  create mode 100644 lib/pmu/pmu_private.h
>  create mode 100644 lib/pmu/rte_pmu.c
>  create mode 100644 lib/pmu/rte_pmu.h
>  create mode 100644 lib/pmu/version.map
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index c5a703b5c0..80bf5968de 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1811,6 +1811,11 @@ M: Nithin Dabilpuram <ndabilpu...@marvell.com>
>  M: Pavan Nikhilesh <pbhagavat...@marvell.com>
>  F: lib/node/
>
> +PMU - EXPERIMENTAL
> +M: Tomasz Duszynski <tduszyn...@marvell.com>
> +F: lib/pmu/
> +F: app/test/test_pmu*
> +
>
>  Test Applications
>  -----------------
> diff --git a/app/test/meson.build b/app/test/meson.build
> index e29258e6ec..45f56d8aae 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -139,6 +139,7 @@ source_file_deps = {
>      'test_pmd_perf.c': ['ethdev', 'net'] + packet_burst_generator_deps,
>      'test_pmd_ring.c': ['net_ring', 'ethdev', 'bus_vdev'],
>      'test_pmd_ring_perf.c': ['ethdev', 'net_ring', 'bus_vdev'],
> +    'test_pmu.c': ['pmu'],
>      'test_power.c': ['power'],
>      'test_power_cpufreq.c': ['power'],
>      'test_power_intel_uncore.c': ['power'],
> diff --git a/app/test/test_pmu.c b/app/test/test_pmu.c
> new file mode 100644
> index 0000000000..5792cf1963
> --- /dev/null
> +++ b/app/test/test_pmu.c
> @@ -0,0 +1,62 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2023 Marvell International Ltd.

2023 -> 2024 across the patches.


> + */
> +
> +#include "test.h"
> +
> +#ifndef RTE_EXEC_ENV_LINUX

There is a lot of condition compilation across the series.
I think, we can remove all that by returning  -ENOSUP for
rte_pmu_init() and friends for Non Linux Operating systems and
adjusting the documentation, so the caller is aware of the contract.
or add rte_pmd_feature_is_enabled() or so.



>
>  - **misc**:
>    [EAL config](@ref rte_eal.h),
> diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in
> index a8823c046f..658490b6a2 100644
> --- a/doc/api/doxy-api.conf.in
> +++ b/doc/api/doxy-api.conf.in
> @@ -69,6 +69,7 @@ INPUT                   = 
> @TOPDIR@/doc/api/doxy-api-index.md \
>                            @TOPDIR@/lib/pdcp \
>                            @TOPDIR@/lib/pdump \
>                            @TOPDIR@/lib/pipeline \
> +                          @TOPDIR@/lib/pmu \
>                            @TOPDIR@/lib/port \
>                            @TOPDIR@/lib/power \
>                            @TOPDIR@/lib/ptr_compress \
> diff --git a/doc/guides/prog_guide/profile_app.rst 
> b/doc/guides/prog_guide/profile_app.rst
> index a6b5fb4d5e..ecb90a0d94 100644
> --- a/doc/guides/prog_guide/profile_app.rst
> +++ b/doc/guides/prog_guide/profile_app.rst
> @@ -7,6 +7,32 @@ Profile Your Application
>  The following sections describe methods of profiling DPDK applications on
>  different architectures.
>
> +Performance counter based profiling
> +-----------------------------------
> +
> +Majority of architectures support some performance monitoring unit (PMU).
> +Such unit provides programmable counters that monitor specific events.
> +
> +Different tools gather that information, like for example perf.
> +However, in some scenarios when CPU cores are isolated and run
> +dedicated tasks interrupting those tasks with perf may be undesirable.
> +
> +In such cases, an application can use the PMU library to read such events 
> via ``rte_pmu_read()``.
> +
> +By default, userspace applications are not allowed to access PMU internals. 
> That can be changed
> +by setting ``/sys/kernel/perf_event_paranoid`` to 2 (that should be a 
> default value anyway) and
> +adding ``CAP_PERFMON`` capability to DPDK application. Please refer to
> +``Documentation/admin-guide/perf-security.rst`` under Linux sources for more 
> information. Fairly
> +recent kernel, i.e >= 5.9, is advised too.
> +
> +As of now implementation imposes certain limitations:
> +
> +* Only EAL lcores are supported
> +
> +* EAL lcores must not share a cpu
> +
> +* Each EAL lcore measures same group of events
> +
>

> new file mode 100644
> index 0000000000..abf32da967
> --- /dev/null
> +++ b/lib/pmu/rte_pmu.h
> @@ -0,0 +1,206 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2024 Marvell
> + */
> +
> +#ifndef _RTE_PMU_H_
> +#define _RTE_PMU_H_
> +
> +/**
> + * @file
> + *
> + * PMU event tracing operations
> + *
> + * This file defines generic API and types necessary to setup PMU and
> + * read selected counters in runtime.
> + */
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <linux/perf_event.h>
> +
> +#include <rte_atomic.h>
> +#include <rte_branch_prediction.h>
> +#include <rte_common.h>
> +#include <rte_compat.h>
> +#include <rte_spinlock.h>
> +
> +/** Maximum number of events in a group */
> +#define RTE_MAX_NUM_GROUP_EVENTS 8
> +
> +/**
> + * A structure describing a group of events.
> + */
> +struct __rte_cache_aligned rte_pmu_event_group {
> +       /**< array of user pages */

Remove < as comment is before the symbol. Please check generated
documentation to tally all documentation.

> +       struct perf_event_mmap_page *mmap_pages[RTE_MAX_NUM_GROUP_EVENTS];
> +       int fds[RTE_MAX_NUM_GROUP_EVENTS]; /**< array of event descriptors */
> +       bool enabled; /**< true if group was enabled on particular lcore */
> +       TAILQ_ENTRY(rte_pmu_event_group) next; /**< list entry */
> +};
> +

Reply via email to