In order to profile app one needs to store significant amount of samples somewhere for an analysis later on. Since trace library supports storing data in a CTF format lets take advantage of that and add a dedicated PMU tracepoint.
Signed-off-by: Tomasz Duszynski <tduszyn...@marvell.com> --- app/test/test_trace_perf.c | 10 ++++ doc/guides/prog_guide/profile_app.rst | 5 ++ doc/guides/prog_guide/trace_lib.rst | 32 +++++++++++++ lib/eal/common/eal_common_trace.c | 13 ++++- lib/eal/common/eal_common_trace_points.c | 5 ++ lib/eal/include/rte_eal_trace.h | 13 +++++ lib/eal/meson.build | 3 ++ lib/eal/version.map | 3 ++ lib/pmu/rte_pmu.c | 61 ++++++++++++++++++++++++ lib/pmu/rte_pmu.h | 14 ++++++ lib/pmu/version.map | 1 + 11 files changed, 159 insertions(+), 1 deletion(-) diff --git a/app/test/test_trace_perf.c b/app/test/test_trace_perf.c index 8257cc02be..f1f2e74136 100644 --- a/app/test/test_trace_perf.c +++ b/app/test/test_trace_perf.c @@ -114,6 +114,10 @@ worker_fn_##func(void *arg) \ #define GENERIC_DOUBLE rte_eal_trace_generic_double(3.66666) #define GENERIC_STR rte_eal_trace_generic_str("hello world") #define VOID_FP app_dpdk_test_fp() +#ifdef RTE_EXEC_ENV_LINUX +/* 0 corresponds first event passed via --trace= */ +#define READ_PMU rte_eal_trace_pmu_read(0) +#endif WORKER_DEFINE(GENERIC_VOID) WORKER_DEFINE(GENERIC_U64) @@ -122,6 +126,9 @@ WORKER_DEFINE(GENERIC_FLOAT) WORKER_DEFINE(GENERIC_DOUBLE) WORKER_DEFINE(GENERIC_STR) WORKER_DEFINE(VOID_FP) +#ifdef RTE_EXEC_ENV_LINUX +WORKER_DEFINE(READ_PMU) +#endif static void run_test(const char *str, lcore_function_t f, struct test_data *data, size_t sz) @@ -174,6 +181,9 @@ test_trace_perf(void) run_test("double", worker_fn_GENERIC_DOUBLE, data, sz); run_test("string", worker_fn_GENERIC_STR, data, sz); run_test("void_fp", worker_fn_VOID_FP, data, sz); +#ifdef RTE_EXEC_ENV_LINUX + run_test("read_pmu", worker_fn_READ_PMU, data, sz); +#endif rte_free(data); return TEST_SUCCESS; diff --git a/doc/guides/prog_guide/profile_app.rst b/doc/guides/prog_guide/profile_app.rst index ecb90a0d94..d73d919521 100644 --- a/doc/guides/prog_guide/profile_app.rst +++ b/doc/guides/prog_guide/profile_app.rst @@ -33,6 +33,11 @@ As of now implementation imposes certain limitations: * Each EAL lcore measures same group of events +Alternatively tracing library can be used which offers dedicated tracepoint +``rte_eal_trace_pmu_event()``. + +Refer to :doc:`../prog_guide/trace_lib` for more details. + Profiling on x86 ---------------- diff --git a/doc/guides/prog_guide/trace_lib.rst b/doc/guides/prog_guide/trace_lib.rst index d9b17abe90..378abccd72 100644 --- a/doc/guides/prog_guide/trace_lib.rst +++ b/doc/guides/prog_guide/trace_lib.rst @@ -46,6 +46,7 @@ DPDK tracing library features trace format and is compatible with ``LTTng``. For detailed information, refer to `Common Trace Format <https://diamon.org/ctf/>`_. +- Support reading PMU events on ARM64 and x86-64 (Intel) How to add a tracepoint? ------------------------ @@ -139,6 +140,37 @@ the user must use ``RTE_TRACE_POINT_FP`` instead of ``RTE_TRACE_POINT``. ``RTE_TRACE_POINT_FP`` is compiled out by default and it can be enabled using the ``enable_trace_fp`` option for meson build. +PMU tracepoint +-------------- + +Performance monitoring unit (PMU) event values can be read from hardware +registers using predefined ``rte_pmu_read`` tracepoint. + +Tracing is enabled via ``--trace`` EAL option by passing both expression +matching PMU tracepoint name i.e ``lib.eal.pmu.read`` and expression +``e=ev1[,ev2,...]`` matching particular events:: + + --trace='.*pmu.read\|e=cpu_cycles,l1d_cache' + +Event names are available under ``/sys/bus/event_source/devices/PMU/events`` +directory, where ``PMU`` is a placeholder for either a ``cpu`` or a directory +containing ``cpus``. + +In contrary to other tracepoints this does not need any extra variables +added to source files. Instead, caller passes index which follows the order of +events specified via ``--trace`` parameter. In the following example index ``0`` +corresponds to ``cpu_cyclces`` while index ``1`` corresponds to ``l1d_cache``. + +.. code-block:: c + + ... + rte_eal_trace_pmu_read(0); + rte_eal_trace_pmu_read(1); + ... + +PMU tracing support must be explicitly enabled using the ``enable_trace_fp`` +option for meson build. + Event record mode ----------------- diff --git a/lib/eal/common/eal_common_trace.c b/lib/eal/common/eal_common_trace.c index 918f49bf4f..5373dbb717 100644 --- a/lib/eal/common/eal_common_trace.c +++ b/lib/eal/common/eal_common_trace.c @@ -12,6 +12,9 @@ #include <rte_errno.h> #include <rte_lcore.h> #include <rte_per_lcore.h> +#ifdef RTE_EXEC_ENV_LINUX +#include <rte_pmu.h> +#endif #include <rte_string_fns.h> #include "eal_trace.h" @@ -72,8 +75,13 @@ eal_trace_init(void) goto free_meta; /* Apply global configurations */ - STAILQ_FOREACH(arg, &trace.args, next) + STAILQ_FOREACH(arg, &trace.args, next) { trace_args_apply(arg->val); +#ifdef RTE_EXEC_ENV_LINUX + if (rte_pmu_init() == 0) + rte_pmu_add_events_by_pattern(arg->val); +#endif + } rte_trace_mode_set(trace.mode); @@ -89,6 +97,9 @@ eal_trace_init(void) void eal_trace_fini(void) { +#ifdef RTE_EXEC_ENV_LINUX + rte_pmu_fini(); +#endif trace_mem_free(); trace_metadata_destroy(); eal_trace_args_free(); diff --git a/lib/eal/common/eal_common_trace_points.c b/lib/eal/common/eal_common_trace_points.c index 0f1240ea3a..05f7ed83d5 100644 --- a/lib/eal/common/eal_common_trace_points.c +++ b/lib/eal/common/eal_common_trace_points.c @@ -100,3 +100,8 @@ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_enable, lib.eal.intr.enable) RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_disable, lib.eal.intr.disable) + +#ifdef RTE_EXEC_ENV_LINUX +RTE_TRACE_POINT_REGISTER(rte_eal_trace_pmu_read, + lib.eal.pmu.read) +#endif diff --git a/lib/eal/include/rte_eal_trace.h b/lib/eal/include/rte_eal_trace.h index c3d15bbe5e..b7e7f5b085 100644 --- a/lib/eal/include/rte_eal_trace.h +++ b/lib/eal/include/rte_eal_trace.h @@ -15,6 +15,9 @@ extern "C" { #endif +#ifdef RTE_EXEC_ENV_LINUX +#include <rte_pmu.h> +#endif #include <rte_trace_point.h> /* Generic */ @@ -127,6 +130,16 @@ RTE_TRACE_POINT( #define RTE_EAL_TRACE_GENERIC_FUNC rte_eal_trace_generic_func(__func__) +#ifdef RTE_EXEC_ENV_LINUX +RTE_TRACE_POINT_FP( + rte_eal_trace_pmu_read, + RTE_TRACE_POINT_ARGS(unsigned int index), + uint64_t val; + val = rte_pmu_read(index); + rte_trace_point_emit_u64(val); +) +#endif + #ifdef __cplusplus } #endif diff --git a/lib/eal/meson.build b/lib/eal/meson.build index e1d6c4cf17..802b66afee 100644 --- a/lib/eal/meson.build +++ b/lib/eal/meson.build @@ -18,6 +18,9 @@ deps += ['log', 'kvargs'] if not is_windows deps += ['telemetry'] endif +if is_linux + deps += ['pmu'] +endif if dpdk_conf.has('RTE_USE_LIBBSD') ext_deps += libbsd endif diff --git a/lib/eal/version.map b/lib/eal/version.map index e3ff412683..17c56f6bf7 100644 --- a/lib/eal/version.map +++ b/lib/eal/version.map @@ -396,6 +396,9 @@ EXPERIMENTAL { # added in 24.03 rte_vfio_get_device_info; # WINDOWS_NO_EXPORT + + # added in 24.11 + __rte_eal_trace_pmu_read; # WINDOWS_NO_EXPORT }; INTERNAL { diff --git a/lib/pmu/rte_pmu.c b/lib/pmu/rte_pmu.c index ddccbfb7c6..70f6244980 100644 --- a/lib/pmu/rte_pmu.c +++ b/lib/pmu/rte_pmu.c @@ -403,6 +403,67 @@ rte_pmu_add_event(const char *name) return event->index; } +static int +add_events(const char *pattern) +{ + char *token, *copy; + int ret = 0; + + copy = strdup(pattern); + if (copy == NULL) + return -ENOMEM; + + token = strtok(copy, ","); + while (token) { + ret = rte_pmu_add_event(token); + if (ret < 0) + break; + + token = strtok(NULL, ","); + } + + free(copy); + + return ret >= 0 ? 0 : ret; +} + +int +rte_pmu_add_events_by_pattern(const char *pattern) +{ + regmatch_t rmatch; + char buf[BUFSIZ]; + unsigned int num; + regex_t reg; + int ret; + + /* events are matched against occurrences of e=ev1[,ev2,..] pattern */ + ret = regcomp(®, "e=([_[:alnum:]-],?)+", REG_EXTENDED); + if (ret) + return -EINVAL; + + for (;;) { + if (regexec(®, pattern, 1, &rmatch, 0)) + break; + + num = rmatch.rm_eo - rmatch.rm_so; + if (num > sizeof(buf)) + num = sizeof(buf); + + /* skip e= pattern prefix */ + memcpy(buf, pattern + rmatch.rm_so + 2, num - 2); + buf[num - 2] = '\0'; + ret = add_events(buf); + if (ret) + break; + + pattern += rmatch.rm_eo; + } + + regfree(®); + + return ret; +} + int rte_pmu_init(void) { diff --git a/lib/pmu/rte_pmu.h b/lib/pmu/rte_pmu.h index a40f69fb5d..78fa6bba19 100644 --- a/lib/pmu/rte_pmu.h +++ b/lib/pmu/rte_pmu.h @@ -170,6 +170,20 @@ __rte_experimental int rte_pmu_add_event(const char *name); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Add events matching pattern to the group of enabled events. + * + * @param pattern + * Pattern e=ev1[,ev2,...] matching events, where evX is a placeholder for an event listed under + * /sys/bus/event_source/devices/pmu/events. + */ +__rte_experimental +int +rte_pmu_add_events_by_pattern(const char *pattern); + /** * @warning * @b EXPERIMENTAL: this API may change without prior notice diff --git a/lib/pmu/version.map b/lib/pmu/version.map index d7c80ce4ce..0c98209b4a 100644 --- a/lib/pmu/version.map +++ b/lib/pmu/version.map @@ -6,6 +6,7 @@ EXPERIMENTAL { per_lcore__event_group; rte_pmu; rte_pmu_add_event; + rte_pmu_add_events_by_pattern; rte_pmu_fini; rte_pmu_init; rte_pmu_read; -- 2.34.1