In order to profile app one needs to store significant amount of samples
somewhere for an analysis latern on. Since trace library supports
storing data in a CTF format lets take adventage of that and add a
dedicated PMU tracepoint.

Signed-off-by: Tomasz Duszynski <tduszyn...@marvell.com>
Acked-by: Morten Brørup <m...@smartsharesystems.com>
---
 app/test/test_trace_perf.c               | 10 ++++
 doc/guides/prog_guide/profile_app.rst    |  5 ++
 doc/guides/prog_guide/trace_lib.rst      | 32 +++++++++++++
 lib/eal/common/eal_common_trace.c        | 13 ++++-
 lib/eal/common/eal_common_trace_points.c |  5 ++
 lib/eal/include/rte_eal_trace.h          | 13 +++++
 lib/eal/meson.build                      |  3 ++
 lib/eal/version.map                      |  1 +
 lib/pmu/rte_pmu.c                        | 61 ++++++++++++++++++++++++
 lib/pmu/rte_pmu.h                        | 14 ++++++
 lib/pmu/version.map                      |  1 +
 11 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/app/test/test_trace_perf.c b/app/test/test_trace_perf.c
index 46ae7d8074..f1929f2734 100644
--- a/app/test/test_trace_perf.c
+++ b/app/test/test_trace_perf.c
@@ -114,6 +114,10 @@ worker_fn_##func(void *arg) \
 #define GENERIC_DOUBLE rte_eal_trace_generic_double(3.66666)
 #define GENERIC_STR rte_eal_trace_generic_str("hello world")
 #define VOID_FP app_dpdk_test_fp()
+#ifdef RTE_EXEC_ENV_LINUX
+/* 0 corresponds first event passed via --trace= */
+#define READ_PMU rte_eal_trace_pmu_read(0)
+#endif
 
 WORKER_DEFINE(GENERIC_VOID)
 WORKER_DEFINE(GENERIC_U64)
@@ -122,6 +126,9 @@ WORKER_DEFINE(GENERIC_FLOAT)
 WORKER_DEFINE(GENERIC_DOUBLE)
 WORKER_DEFINE(GENERIC_STR)
 WORKER_DEFINE(VOID_FP)
+#ifdef RTE_EXEC_ENV_LINUX
+WORKER_DEFINE(READ_PMU)
+#endif
 
 static void
 run_test(const char *str, lcore_function_t f, struct test_data *data, size_t 
sz)
@@ -174,6 +181,9 @@ test_trace_perf(void)
        run_test("double", worker_fn_GENERIC_DOUBLE, data, sz);
        run_test("string", worker_fn_GENERIC_STR, data, sz);
        run_test("void_fp", worker_fn_VOID_FP, data, sz);
+#ifdef RTE_EXEC_ENV_LINUX
+       run_test("read_pmu", worker_fn_READ_PMU, data, sz);
+#endif
 
        rte_free(data);
        return TEST_SUCCESS;
diff --git a/doc/guides/prog_guide/profile_app.rst 
b/doc/guides/prog_guide/profile_app.rst
index 89e38cd301..c4dfe85c3b 100644
--- a/doc/guides/prog_guide/profile_app.rst
+++ b/doc/guides/prog_guide/profile_app.rst
@@ -19,6 +19,11 @@ dedicated tasks interrupting those tasks with perf may be 
undesirable.
 
 In such cases, an application can use the PMU library to read such events via 
``rte_pmu_read()``.
 
+Alternatively tracing library can be used which offers dedicated tracepoint
+``rte_eal_trace_pmu_event()``.
+
+Refer to :doc:`../prog_guide/trace_lib` for more details.
+
 
 Profiling on x86
 ----------------
diff --git a/doc/guides/prog_guide/trace_lib.rst 
b/doc/guides/prog_guide/trace_lib.rst
index 3e0ea5835c..9c81936e35 100644
--- a/doc/guides/prog_guide/trace_lib.rst
+++ b/doc/guides/prog_guide/trace_lib.rst
@@ -46,6 +46,7 @@ DPDK tracing library features
   trace format and is compatible with ``LTTng``.
   For detailed information, refer to
   `Common Trace Format <https://diamon.org/ctf/>`_.
+- Support reading PMU events on ARM64 and x86-64 (Intel)
 
 How to add a tracepoint?
 ------------------------
@@ -137,6 +138,37 @@ the user must use ``RTE_TRACE_POINT_FP`` instead of 
``RTE_TRACE_POINT``.
 ``RTE_TRACE_POINT_FP`` is compiled out by default and it can be enabled using
 the ``enable_trace_fp`` option for meson build.
 
+PMU tracepoint
+--------------
+
+Performance monitoring unit (PMU) event values can be read from hardware
+registers using predefined ``rte_pmu_read`` tracepoint.
+
+Tracing is enabled via ``--trace`` EAL option by passing both expression
+matching PMU tracepoint name i.e ``lib.eal.pmu.read`` and expression
+``e=ev1[,ev2,...]`` matching particular events::
+
+    --trace='.*pmu.read\|e=cpu_cycles,l1d_cache'
+
+Event names are available under ``/sys/bus/event_source/devices/PMU/events``
+directory, where ``PMU`` is a placeholder for either a ``cpu`` or a directory
+containing ``cpus``.
+
+In contrary to other tracepoints this does not need any extra variables
+added to source files. Instead, caller passes index which follows the order of
+events specified via ``--trace`` parameter. In the following example index 
``0``
+corresponds to ``cpu_cyclces`` while index ``1`` corresponds to ``l1d_cache``.
+
+.. code-block:: c
+
+ ...
+ rte_eal_trace_pmu_read(0);
+ rte_eal_trace_pmu_read(1);
+ ...
+
+PMU tracing support must be explicitly enabled using the ``enable_trace_fp``
+option for meson build.
+
 Event record mode
 -----------------
 
diff --git a/lib/eal/common/eal_common_trace.c 
b/lib/eal/common/eal_common_trace.c
index 75162b722d..8796052d0c 100644
--- a/lib/eal/common/eal_common_trace.c
+++ b/lib/eal/common/eal_common_trace.c
@@ -11,6 +11,9 @@
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_per_lcore.h>
+#ifdef RTE_EXEC_ENV_LINUX
+#include <rte_pmu.h>
+#endif
 #include <rte_string_fns.h>
 
 #include "eal_trace.h"
@@ -71,8 +74,13 @@ eal_trace_init(void)
                goto free_meta;
 
        /* Apply global configurations */
-       STAILQ_FOREACH(arg, &trace.args, next)
+       STAILQ_FOREACH(arg, &trace.args, next) {
                trace_args_apply(arg->val);
+#ifdef RTE_EXEC_ENV_LINUX
+               if (rte_pmu_init() == 0)
+                       rte_pmu_add_events_by_pattern(arg->val);
+#endif
+       }
 
        rte_trace_mode_set(trace.mode);
 
@@ -88,6 +96,9 @@ eal_trace_init(void)
 void
 eal_trace_fini(void)
 {
+#ifdef RTE_EXEC_ENV_LINUX
+       rte_pmu_fini();
+#endif
        trace_mem_free();
        trace_metadata_destroy();
        eal_trace_args_free();
diff --git a/lib/eal/common/eal_common_trace_points.c 
b/lib/eal/common/eal_common_trace_points.c
index 051f89809c..9d6faa19ed 100644
--- a/lib/eal/common/eal_common_trace_points.c
+++ b/lib/eal/common/eal_common_trace_points.c
@@ -77,3 +77,8 @@ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_enable,
        lib.eal.intr.enable)
 RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_disable,
        lib.eal.intr.disable)
+
+#ifdef RTE_EXEC_ENV_LINUX
+RTE_TRACE_POINT_REGISTER(rte_eal_trace_pmu_read,
+       lib.eal.pmu.read)
+#endif
diff --git a/lib/eal/include/rte_eal_trace.h b/lib/eal/include/rte_eal_trace.h
index 6f5c022558..c7da83c480 100644
--- a/lib/eal/include/rte_eal_trace.h
+++ b/lib/eal/include/rte_eal_trace.h
@@ -17,6 +17,9 @@ extern "C" {
 
 #include <rte_alarm.h>
 #include <rte_interrupts.h>
+#ifdef RTE_EXEC_ENV_LINUX
+#include <rte_pmu.h>
+#endif
 #include <rte_trace_point.h>
 
 #include "eal_interrupts.h"
@@ -285,6 +288,16 @@ RTE_TRACE_POINT(
        rte_trace_point_emit_string(cpuset);
 )
 
+#ifdef RTE_EXEC_ENV_LINUX
+RTE_TRACE_POINT_FP(
+       rte_eal_trace_pmu_read,
+       RTE_TRACE_POINT_ARGS(unsigned int index),
+       uint64_t val;
+       val = rte_pmu_read(index);
+       rte_trace_point_emit_u64(val);
+)
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/meson.build b/lib/eal/meson.build
index 056beb9461..f5865dbcd9 100644
--- a/lib/eal/meson.build
+++ b/lib/eal/meson.build
@@ -26,6 +26,9 @@ deps += ['kvargs']
 if not is_windows
     deps += ['telemetry']
 endif
+if is_linux
+    deps += ['pmu']
+endif
 if dpdk_conf.has('RTE_USE_LIBBSD')
     ext_deps += libbsd
 endif
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 2ae57ee78a..01e7a099d2 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -441,6 +441,7 @@ EXPERIMENTAL {
        rte_thread_join;
 
        # added in 23.03
+       __rte_eal_trace_pmu_read; # WINDOWS_NO_EXPORT
        rte_lcore_register_usage_cb;
        rte_thread_create_control;
        rte_thread_set_name;
diff --git a/lib/pmu/rte_pmu.c b/lib/pmu/rte_pmu.c
index 950f999cb7..862edcb1e3 100644
--- a/lib/pmu/rte_pmu.c
+++ b/lib/pmu/rte_pmu.c
@@ -398,6 +398,67 @@ rte_pmu_add_event(const char *name)
        return event->index;
 }
 
+static int
+add_events(const char *pattern)
+{
+       char *token, *copy;
+       int ret = 0;
+
+       copy = strdup(pattern);
+       if (copy == NULL)
+               return -ENOMEM;
+
+       token = strtok(copy, ",");
+       while (token) {
+               ret = rte_pmu_add_event(token);
+               if (ret < 0)
+                       break;
+
+               token = strtok(NULL, ",");
+       }
+
+       free(copy);
+
+       return ret >= 0 ? 0 : ret;
+}
+
+int
+rte_pmu_add_events_by_pattern(const char *pattern)
+{
+       regmatch_t rmatch;
+       char buf[BUFSIZ];
+       unsigned int num;
+       regex_t reg;
+       int ret;
+
+       /* events are matched against occurrences of e=ev1[,ev2,..] pattern */
+       ret = regcomp(&reg, "e=([_[:alnum:]-],?)+", REG_EXTENDED);
+       if (ret)
+               return -EINVAL;
+
+       for (;;) {
+               if (regexec(&reg, pattern, 1, &rmatch, 0))
+                       break;
+
+               num = rmatch.rm_eo - rmatch.rm_so;
+               if (num > sizeof(buf))
+                       num = sizeof(buf);
+
+               /* skip e= pattern prefix */
+               memcpy(buf, pattern + rmatch.rm_so + 2, num - 2);
+               buf[num - 2] = '\0';
+               ret = add_events(buf);
+               if (ret)
+                       break;
+
+               pattern += rmatch.rm_eo;
+       }
+
+       regfree(&reg);
+
+       return ret;
+}
+
 int
 rte_pmu_init(void)
 {
diff --git a/lib/pmu/rte_pmu.h b/lib/pmu/rte_pmu.h
index b1d1c17bc5..e1c3bb5e56 100644
--- a/lib/pmu/rte_pmu.h
+++ b/lib/pmu/rte_pmu.h
@@ -176,6 +176,20 @@ __rte_experimental
 int
 rte_pmu_add_event(const char *name);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Add events matching pattern to the group of enabled events.
+ *
+ * @param pattern
+ *   Pattern e=ev1[,ev2,...] matching events, where evX is a placeholder for 
an event listed under
+ *   /sys/bus/event_source/devices/pmu/events.
+ */
+__rte_experimental
+int
+rte_pmu_add_events_by_pattern(const char *pattern);
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
diff --git a/lib/pmu/version.map b/lib/pmu/version.map
index 39a4f279c1..e16b3ff009 100644
--- a/lib/pmu/version.map
+++ b/lib/pmu/version.map
@@ -9,6 +9,7 @@ EXPERIMENTAL {
        per_lcore__event_group;
        rte_pmu;
        rte_pmu_add_event;
+       rte_pmu_add_events_by_pattern;
        rte_pmu_fini;
        rte_pmu_init;
        rte_pmu_read;
-- 
2.34.1

Reply via email to