> In order to profile app one needs to store significant amount of samples > somewhere for an analysis later on. Since trace library supports > storing data in a CTF format lets take advantage of that and add a > dedicated PMU tracepoint. > > Signed-off-by: Tomasz Duszynski <tduszyn...@marvell.com> > --- > app/test/test_trace_perf.c | 10 ++++ > doc/guides/prog_guide/profile_app.rst | 5 ++ > doc/guides/prog_guide/trace_lib.rst | 32 +++++++++++ > lib/eal/common/eal_common_trace.c | 5 +- > lib/eal/common/eal_common_trace_pmu.c | 38 ++++++++++++++ > lib/eal/common/eal_common_trace_points.c | 5 ++ > lib/eal/common/eal_trace.h | 4 ++ > lib/eal/common/meson.build | 1 + > lib/eal/include/rte_eal_trace.h | 11 ++++ > lib/eal/version.map | 1 + > lib/pmu/rte_pmu.c | 67 +++++++++++++++++++++++- > lib/pmu/rte_pmu.h | 24 +++++++-- > lib/pmu/version.map | 1 + > 13 files changed, 198 insertions(+), 6 deletions(-) > create mode 100644 lib/eal/common/eal_common_trace_pmu.c > > diff --git a/app/test/test_trace_perf.c b/app/test/test_trace_perf.c > index 8257cc02be..8a0730943e 100644 > --- a/app/test/test_trace_perf.c > +++ b/app/test/test_trace_perf.c > @@ -114,6 +114,10 @@ worker_fn_##func(void *arg) \ > #define GENERIC_DOUBLE rte_eal_trace_generic_double(3.66666) > #define GENERIC_STR rte_eal_trace_generic_str("hello world") > #define VOID_FP app_dpdk_test_fp() > +#ifdef RTE_LIB_PMU > +/* 0 corresponds first event passed via --trace= */ > +#define READ_PMU rte_eal_trace_pmu_read(0) > +#endif > > WORKER_DEFINE(GENERIC_VOID) > WORKER_DEFINE(GENERIC_U64) > @@ -122,6 +126,9 @@ WORKER_DEFINE(GENERIC_FLOAT) > WORKER_DEFINE(GENERIC_DOUBLE) > WORKER_DEFINE(GENERIC_STR) > WORKER_DEFINE(VOID_FP) > +#ifdef RTE_LIB_PMU > +WORKER_DEFINE(READ_PMU) > +#endif > > static void > run_test(const char *str, lcore_function_t f, struct test_data *data, size_t > sz) > @@ -174,6 +181,9 @@ test_trace_perf(void) > run_test("double", worker_fn_GENERIC_DOUBLE, data, sz); > run_test("string", worker_fn_GENERIC_STR, data, sz); > run_test("void_fp", worker_fn_VOID_FP, data, sz); > +#ifdef RTE_LIB_PMU > + run_test("read_pmu", worker_fn_READ_PMU, data, sz); > +#endif > > rte_free(data); > return TEST_SUCCESS; > diff --git a/doc/guides/prog_guide/profile_app.rst > b/doc/guides/prog_guide/profile_app.rst > index 854c515a61..1ab6fb9eaa 100644 > --- a/doc/guides/prog_guide/profile_app.rst > +++ b/doc/guides/prog_guide/profile_app.rst > @@ -36,6 +36,11 @@ As of now implementation imposes certain limitations: > > * Each EAL lcore measures same group of events > > +Alternatively tracing library can be used which offers dedicated tracepoint > +``rte_eal_trace_pmu_event()``. > + > +Refer to :doc:`../prog_guide/trace_lib` for more details. > + > > Profiling on x86 > ---------------- > diff --git a/doc/guides/prog_guide/trace_lib.rst > b/doc/guides/prog_guide/trace_lib.rst > index d9b17abe90..378abccd72 100644 > --- a/doc/guides/prog_guide/trace_lib.rst > +++ b/doc/guides/prog_guide/trace_lib.rst > @@ -46,6 +46,7 @@ DPDK tracing library features > trace format and is compatible with ``LTTng``. > For detailed information, refer to > `Common Trace Format <https://diamon.org/ctf/>`_. > +- Support reading PMU events on ARM64 and x86-64 (Intel) > > How to add a tracepoint? > ------------------------ > @@ -139,6 +140,37 @@ the user must use ``RTE_TRACE_POINT_FP`` instead of > ``RTE_TRACE_POINT``. > ``RTE_TRACE_POINT_FP`` is compiled out by default and it can be enabled using > the ``enable_trace_fp`` option for meson build. > > +PMU tracepoint > +-------------- > + > +Performance monitoring unit (PMU) event values can be read from hardware > +registers using predefined ``rte_pmu_read`` tracepoint. > + > +Tracing is enabled via ``--trace`` EAL option by passing both expression > +matching PMU tracepoint name i.e ``lib.eal.pmu.read`` and expression > +``e=ev1[,ev2,...]`` matching particular events:: > + > + --trace='.*pmu.read\|e=cpu_cycles,l1d_cache' > + > +Event names are available under ``/sys/bus/event_source/devices/PMU/events`` > +directory, where ``PMU`` is a placeholder for either a ``cpu`` or a directory > +containing ``cpus``. > + > +In contrary to other tracepoints this does not need any extra variables > +added to source files. Instead, caller passes index which follows the order > of > +events specified via ``--trace`` parameter. In the following example index > ``0`` > +corresponds to ``cpu_cyclces`` while index ``1`` corresponds to > ``l1d_cache``. > + > +.. code-block:: c > + > + ... > + rte_eal_trace_pmu_read(0); > + rte_eal_trace_pmu_read(1); > + ... > + > +PMU tracing support must be explicitly enabled using the ``enable_trace_fp`` > +option for meson build. > + > Event record mode > ----------------- > > diff --git a/lib/eal/common/eal_common_trace.c > b/lib/eal/common/eal_common_trace.c > index 918f49bf4f..9be8724ec4 100644 > --- a/lib/eal/common/eal_common_trace.c > +++ b/lib/eal/common/eal_common_trace.c > @@ -72,8 +72,10 @@ eal_trace_init(void) > goto free_meta; > > /* Apply global configurations */ > - STAILQ_FOREACH(arg, &trace.args, next) > + STAILQ_FOREACH(arg, &trace.args, next) { > trace_args_apply(arg->val); > + trace_pmu_args_apply(arg->val); > + } > > rte_trace_mode_set(trace.mode); > > @@ -89,6 +91,7 @@ eal_trace_init(void) > void > eal_trace_fini(void) > { > + trace_pmu_args_free(); > trace_mem_free(); > trace_metadata_destroy(); > eal_trace_args_free(); > diff --git a/lib/eal/common/eal_common_trace_pmu.c > b/lib/eal/common/eal_common_trace_pmu.c > new file mode 100644 > index 0000000000..b3ab41e8a2 > --- /dev/null > +++ b/lib/eal/common/eal_common_trace_pmu.c > @@ -0,0 +1,38 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2024 Marvell International Ltd. > + */ > + > +#include <rte_common.h> > + > +#include "eal_trace.h" > + > +#ifdef RTE_LIB_PMU > + > +#include <rte_pmu.h> > + > +void > +trace_pmu_args_apply(const char *arg) > +{ > + static bool once; > + > + if (!once) { > + if (rte_pmu_init()) > + return; > + once = true; > + } > + > + rte_pmu_add_events_by_pattern(arg); > +} > + > +void > +trace_pmu_args_free(void) > +{ > + rte_pmu_fini(); > +} > + > +#else /* !RTE_LIB_PMU */ > + > +void trace_pmu_args_apply(const char *arg __rte_unused) { return; } > +void trace_pmu_args_free(void) { return; } > + > +#endif /* RTE_LIB_PMU */ > diff --git a/lib/eal/common/eal_common_trace_points.c > b/lib/eal/common/eal_common_trace_points.c > index 0f1240ea3a..c99eab92f4 100644 > --- a/lib/eal/common/eal_common_trace_points.c > +++ b/lib/eal/common/eal_common_trace_points.c > @@ -100,3 +100,8 @@ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_enable, > lib.eal.intr.enable) > RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_disable, > lib.eal.intr.disable) > + > +#ifdef RTE_LIB_PMU > +RTE_TRACE_POINT_REGISTER(rte_eal_trace_pmu_read, > + lib.eal.pmu.read) > +#endif > diff --git a/lib/eal/common/eal_trace.h b/lib/eal/common/eal_trace.h > index 55262677e0..58fa43472a 100644 > --- a/lib/eal/common/eal_trace.h > +++ b/lib/eal/common/eal_trace.h > @@ -104,6 +104,10 @@ int trace_epoch_time_save(void); > void trace_mem_free(void); > void trace_mem_per_thread_free(void); > > +/* PMU wrappers */ > +void trace_pmu_args_apply(const char *arg); > +void trace_pmu_args_free(void); > + > /* EAL interface */ > int eal_trace_init(void); > void eal_trace_fini(void); > diff --git a/lib/eal/common/meson.build b/lib/eal/common/meson.build > index c1bbf26654..59d5b15708 100644 > --- a/lib/eal/common/meson.build > +++ b/lib/eal/common/meson.build > @@ -27,6 +27,7 @@ sources += files( > 'eal_common_tailqs.c', > 'eal_common_thread.c', > 'eal_common_timer.c', > + 'eal_common_trace_pmu.c', > 'eal_common_trace_points.c', > 'eal_common_uuid.c', > 'malloc_elem.c', > diff --git a/lib/eal/include/rte_eal_trace.h b/lib/eal/include/rte_eal_trace.h > index 9ad2112801..9c78f63ff5 100644 > --- a/lib/eal/include/rte_eal_trace.h > +++ b/lib/eal/include/rte_eal_trace.h > @@ -127,6 +127,17 @@ RTE_TRACE_POINT( > > #define RTE_EAL_TRACE_GENERIC_FUNC rte_eal_trace_generic_func(__func__) > > +#ifdef RTE_LIB_PMU > +#include <rte_pmu.h> > + > +RTE_TRACE_POINT_FP( > + rte_eal_trace_pmu_read, > + RTE_TRACE_POINT_ARGS(unsigned int index), > + uint64_t val = rte_pmu_read(index); > + rte_trace_point_emit_u64(val); > +) > +#endif > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/eal/version.map b/lib/eal/version.map > index f493cd1ca7..3dc147f848 100644 > --- a/lib/eal/version.map > +++ b/lib/eal/version.map > @@ -399,6 +399,7 @@ EXPERIMENTAL { > > # added in 24.11 > rte_bitset_to_str; > + __rte_eal_trace_pmu_read; # WINDOWS_NO_EXPORT > }; > > INTERNAL { > diff --git a/lib/pmu/rte_pmu.c b/lib/pmu/rte_pmu.c > index dd57961627..b65f08c75b 100644 > --- a/lib/pmu/rte_pmu.c > +++ b/lib/pmu/rte_pmu.c > @@ -412,12 +412,75 @@ rte_pmu_add_event(const char *name) > return event->index; > } > > +static int > +add_events(const char *pattern) > +{ > + char *token, *copy; > + int ret = 0; > + > + copy = strdup(pattern); > + if (copy == NULL) > + return -ENOMEM; > + > + token = strtok(copy, ","); > + while (token) { > + ret = rte_pmu_add_event(token); > + if (ret < 0) > + break; > + > + token = strtok(NULL, ","); > + } > + > + free(copy); > + > + return ret >= 0 ? 0 : ret; > +} > + > +int > +rte_pmu_add_events_by_pattern(const char *pattern) > +{ > + regmatch_t rmatch; > + char buf[BUFSIZ]; > + unsigned int num; > + regex_t reg; > + int ret; > + > + /* events are matched against occurrences of e=ev1[,ev2,..] pattern */ > + ret = regcomp(®, "e=([_[:alnum:]-],?)+", REG_EXTENDED); > + if (ret) { > + PMU_LOG(ERR, "Failed to compile event matching regexp"); > + return -EINVAL; > + } > + > + for (;;) { > + if (regexec(®, pattern, 1, &rmatch, 0)) > + break; > + > + num = rmatch.rm_eo - rmatch.rm_so; > + if (num > sizeof(buf)) > + num = sizeof(buf); > + > + /* skip e= pattern prefix */ > + memcpy(buf, pattern + rmatch.rm_so + 2, num - 2); > + buf[num - 2] = '\0'; > + ret = add_events(buf); > + if (ret) > + break; > + > + pattern += rmatch.rm_eo; > + } > + > + regfree(®); > + > + return ret; > +} > + > int > rte_pmu_init(void) > { > int ret; > > - if (rte_pmu.initialized) > + if (rte_pmu.initialized && ++rte_pmu.initialized)
Stupid q, why not: if (rte_pmu.initialized++ > 0) return 0? Also increment/decrement counter implies that init()/fini() can be called multiple times, correct? If so, there is still an implicit assumption that it always will happen in a sequential manner? > return 0; > > ret = scan_pmus(); > @@ -450,7 +513,7 @@ rte_pmu_fini(void) > struct rte_pmu_event_group *group; > unsigned int i; > > - if (!rte_pmu.initialized) > + if (!rte_pmu.initialized || --rte_pmu.initialized) > return; > > RTE_TAILQ_FOREACH_SAFE(event, &rte_pmu.event_list, next, tmp_event) { > diff --git a/lib/pmu/rte_pmu.h b/lib/pmu/rte_pmu.h > index 85f9127911..df571f0a2f 100644 > --- a/lib/pmu/rte_pmu.h > +++ b/lib/pmu/rte_pmu.h > @@ -135,7 +135,7 @@ __rte_pmu_enable_group(struct rte_pmu_event_group *group); > * @warning > * @b EXPERIMENTAL: this API may change without prior notice > * > - * Initialize PMU library. > + * Initialize PMU library. It's safe to call it multiple times. > * > * @return > * 0 in case of success, negative value otherwise. > @@ -148,7 +148,9 @@ rte_pmu_init(void); > * @warning > * @b EXPERIMENTAL: this API may change without prior notice > * > - * Finalize PMU library. > + * Finalize PMU library. Number of calls must match number > + * of times rte_pmu_init() was called. Otherwise memory > + * won't be freed properly. > */ > __rte_experimental > void > @@ -173,7 +175,23 @@ rte_pmu_add_event(const char *name); > * @warning > * @b EXPERIMENTAL: this API may change without prior notice > * > - * Read hardware counter configured to count occurrences of an event. > + * Add events matching pattern to the group of enabled events. > + * > + * @param pattern > + * Pattern e=ev1[,ev2,...] matching events, where evX is a placeholder for > an event listed under > + * /sys/bus/event_source/devices/<pmu>/events. > + */ > +__rte_experimental > +int > +rte_pmu_add_events_by_pattern(const char *pattern); > + > +/** > + * @warning > + * @b EXPERIMENTAL: this API may change without prior notice > + * > + * Read hardware counter configured to count occurrences of an event. This > is called by an lcore > + * binded exclusively to particular cpu and may not work as expected if gets > migrated elsewhere. > + * Reason being event group is pinned hence not supposed to be multiplexed > with any other events. > * > * @param index > * Index of an event to be read. > diff --git a/lib/pmu/version.map b/lib/pmu/version.map > index d0f907d13d..f14d498b54 100644 > --- a/lib/pmu/version.map > +++ b/lib/pmu/version.map > @@ -5,6 +5,7 @@ EXPERIMENTAL { > __rte_pmu_enable_group; > rte_pmu; > rte_pmu_add_event; > + rte_pmu_add_events_by_pattern; > rte_pmu_fini; > rte_pmu_init; > rte_pmu_read; > -- > 2.34.1