>> In order to profile app one needs to store significant amount of
>> samples somewhere for an analysis later on. Since trace library
>> supports storing data in a CTF format lets take advantage of that and
>> add a dedicated PMU tracepoint.
>>
>> Signed-off-by: Tomasz Duszynski <tduszyn...@marvell.com>
>> ---
>>  app/test/test_trace_perf.c               | 10 ++++
>>  doc/guides/prog_guide/profile_app.rst    |  5 ++
>>  doc/guides/prog_guide/trace_lib.rst      | 32 +++++++++++
>>  lib/eal/common/eal_common_trace.c        |  5 +-
>>  lib/eal/common/eal_common_trace_pmu.c    | 38 ++++++++++++++
>>  lib/eal/common/eal_common_trace_points.c |  5 ++
>>  lib/eal/common/eal_trace.h               |  4 ++
>>  lib/eal/common/meson.build               |  1 +
>>  lib/eal/include/rte_eal_trace.h          | 11 ++++
>>  lib/eal/version.map                      |  1 +
>>  lib/pmu/rte_pmu.c                        | 67 +++++++++++++++++++++++-
>>  lib/pmu/rte_pmu.h                        | 24 +++++++--
>>  lib/pmu/version.map                      |  1 +
>>  13 files changed, 198 insertions(+), 6 deletions(-)  create mode
>> 100644 lib/eal/common/eal_common_trace_pmu.c
>>
>> diff --git a/app/test/test_trace_perf.c b/app/test/test_trace_perf.c
>> index 8257cc02be..8a0730943e 100644
>> --- a/app/test/test_trace_perf.c
>> +++ b/app/test/test_trace_perf.c
>> @@ -114,6 +114,10 @@ worker_fn_##func(void *arg) \  #define
>> GENERIC_DOUBLE rte_eal_trace_generic_double(3.66666)
>>  #define GENERIC_STR rte_eal_trace_generic_str("hello world")  #define
>> VOID_FP app_dpdk_test_fp()
>> +#ifdef RTE_LIB_PMU
>> +/* 0 corresponds first event passed via --trace= */ #define READ_PMU
>> +rte_eal_trace_pmu_read(0) #endif
>>
>>  WORKER_DEFINE(GENERIC_VOID)
>>  WORKER_DEFINE(GENERIC_U64)
>> @@ -122,6 +126,9 @@ WORKER_DEFINE(GENERIC_FLOAT)
>>  WORKER_DEFINE(GENERIC_DOUBLE)
>>  WORKER_DEFINE(GENERIC_STR)
>>  WORKER_DEFINE(VOID_FP)
>> +#ifdef RTE_LIB_PMU
>> +WORKER_DEFINE(READ_PMU)
>> +#endif
>>
>>  static void
>>  run_test(const char *str, lcore_function_t f, struct test_data *data,
>> size_t sz) @@ -174,6 +181,9 @@ test_trace_perf(void)
>>      run_test("double", worker_fn_GENERIC_DOUBLE, data, sz);
>>      run_test("string", worker_fn_GENERIC_STR, data, sz);
>>      run_test("void_fp", worker_fn_VOID_FP, data, sz);
>> +#ifdef RTE_LIB_PMU
>> +    run_test("read_pmu", worker_fn_READ_PMU, data, sz); #endif
>>
>>      rte_free(data);
>>      return TEST_SUCCESS;
>> diff --git a/doc/guides/prog_guide/profile_app.rst
>> b/doc/guides/prog_guide/profile_app.rst
>> index 854c515a61..1ab6fb9eaa 100644
>> --- a/doc/guides/prog_guide/profile_app.rst
>> +++ b/doc/guides/prog_guide/profile_app.rst
>> @@ -36,6 +36,11 @@ As of now implementation imposes certain limitations:
>>
>>  * Each EAL lcore measures same group of events
>>
>> +Alternatively tracing library can be used which offers dedicated
>> +tracepoint ``rte_eal_trace_pmu_event()``.
>> +
>> +Refer to :doc:`../prog_guide/trace_lib` for more details.
>> +
>>
>>  Profiling on x86
>>  ----------------
>> diff --git a/doc/guides/prog_guide/trace_lib.rst
>> b/doc/guides/prog_guide/trace_lib.rst
>> index d9b17abe90..378abccd72 100644
>> --- a/doc/guides/prog_guide/trace_lib.rst
>> +++ b/doc/guides/prog_guide/trace_lib.rst
>> @@ -46,6 +46,7 @@ DPDK tracing library features
>>    trace format and is compatible with ``LTTng``.
>>    For detailed information, refer to
>>    `Common Trace Format <https://urldefense.proofpoint.com/v2/url?u=https-
>3A__diamon.org_ctf_&d=DwIFAg&c=nKjWec2b6R0mOyPaz7xtfQ&r=PZNXgrbjdlXxVEEGYkxIxRndyEUwWU_ad5ce22YI6Is
>&m=sUalf-TsyEi-
>hdbFm4eACDHmnQ9BrqmS_1Df11kPLuDK3_xxQZoVjxp2ZFNsHPnb&s=JntIuAUv6IrRBlS92bo8iPsQ66IVHHPouJ4S82GCNyI&
>e=>`_.
>> +- Support reading PMU events on ARM64 and x86-64 (Intel)
>>
>>  How to add a tracepoint?
>>  ------------------------
>> @@ -139,6 +140,37 @@ the user must use ``RTE_TRACE_POINT_FP`` instead of 
>> ``RTE_TRACE_POINT``.
>>  ``RTE_TRACE_POINT_FP`` is compiled out by default and it can be
>> enabled using  the ``enable_trace_fp`` option for meson build.
>>
>> +PMU tracepoint
>> +--------------
>> +
>> +Performance monitoring unit (PMU) event values can be read from
>> +hardware registers using predefined ``rte_pmu_read`` tracepoint.
>> +
>> +Tracing is enabled via ``--trace`` EAL option by passing both
>> +expression matching PMU tracepoint name i.e ``lib.eal.pmu.read`` and
>> +expression ``e=ev1[,ev2,...]`` matching particular events::
>> +
>> +    --trace='.*pmu.read\|e=cpu_cycles,l1d_cache'
>> +
>> +Event names are available under
>> +``/sys/bus/event_source/devices/PMU/events``
>> +directory, where ``PMU`` is a placeholder for either a ``cpu`` or a
>> +directory containing ``cpus``.
>> +
>> +In contrary to other tracepoints this does not need any extra
>> +variables added to source files. Instead, caller passes index which
>> +follows the order of events specified via ``--trace`` parameter. In
>> +the following example index ``0`` corresponds to ``cpu_cyclces`` while 
>> index ``1`` corresponds
>to ``l1d_cache``.
>> +
>> +.. code-block:: c
>> +
>> + ...
>> + rte_eal_trace_pmu_read(0);
>> + rte_eal_trace_pmu_read(1);
>> + ...
>> +
>> +PMU tracing support must be explicitly enabled using the
>> +``enable_trace_fp`` option for meson build.
>> +
>>  Event record mode
>>  -----------------
>>
>> diff --git a/lib/eal/common/eal_common_trace.c
>> b/lib/eal/common/eal_common_trace.c
>> index 918f49bf4f..9be8724ec4 100644
>> --- a/lib/eal/common/eal_common_trace.c
>> +++ b/lib/eal/common/eal_common_trace.c
>> @@ -72,8 +72,10 @@ eal_trace_init(void)
>>              goto free_meta;
>>
>>      /* Apply global configurations */
>> -    STAILQ_FOREACH(arg, &trace.args, next)
>> +    STAILQ_FOREACH(arg, &trace.args, next) {
>>              trace_args_apply(arg->val);
>> +            trace_pmu_args_apply(arg->val);
>> +    }
>>
>>      rte_trace_mode_set(trace.mode);
>>
>> @@ -89,6 +91,7 @@ eal_trace_init(void)  void
>>  eal_trace_fini(void)
>>  {
>> +    trace_pmu_args_free();
>>      trace_mem_free();
>>      trace_metadata_destroy();
>>      eal_trace_args_free();
>> diff --git a/lib/eal/common/eal_common_trace_pmu.c
>> b/lib/eal/common/eal_common_trace_pmu.c
>> new file mode 100644
>> index 0000000000..b3ab41e8a2
>> --- /dev/null
>> +++ b/lib/eal/common/eal_common_trace_pmu.c
>> @@ -0,0 +1,38 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(C) 2024 Marvell International Ltd.
>> + */
>> +
>> +#include <rte_common.h>
>> +
>> +#include "eal_trace.h"
>> +
>> +#ifdef RTE_LIB_PMU
>> +
>> +#include <rte_pmu.h>
>> +
>> +void
>> +trace_pmu_args_apply(const char *arg) {
>> +    static bool once;
>> +
>> +    if (!once) {
>> +            if (rte_pmu_init())
>> +                    return;
>> +            once = true;
>> +    }
>> +
>> +    rte_pmu_add_events_by_pattern(arg);
>> +}
>> +
>> +void
>> +trace_pmu_args_free(void)
>> +{
>> +    rte_pmu_fini();
>> +}
>> +
>> +#else /* !RTE_LIB_PMU */
>> +
>> +void trace_pmu_args_apply(const char *arg __rte_unused) { return; }
>> +void trace_pmu_args_free(void) { return; }
>> +
>> +#endif /* RTE_LIB_PMU */
>> diff --git a/lib/eal/common/eal_common_trace_points.c
>> b/lib/eal/common/eal_common_trace_points.c
>> index 0f1240ea3a..c99eab92f4 100644
>> --- a/lib/eal/common/eal_common_trace_points.c
>> +++ b/lib/eal/common/eal_common_trace_points.c
>> @@ -100,3 +100,8 @@ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_enable,
>>      lib.eal.intr.enable)
>>  RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_disable,
>>      lib.eal.intr.disable)
>> +
>> +#ifdef RTE_LIB_PMU
>> +RTE_TRACE_POINT_REGISTER(rte_eal_trace_pmu_read,
>> +    lib.eal.pmu.read)
>> +#endif
>> diff --git a/lib/eal/common/eal_trace.h b/lib/eal/common/eal_trace.h
>> index 55262677e0..58fa43472a 100644
>> --- a/lib/eal/common/eal_trace.h
>> +++ b/lib/eal/common/eal_trace.h
>> @@ -104,6 +104,10 @@ int trace_epoch_time_save(void);  void
>> trace_mem_free(void);  void trace_mem_per_thread_free(void);
>>
>> +/* PMU wrappers */
>> +void trace_pmu_args_apply(const char *arg); void
>> +trace_pmu_args_free(void);
>> +
>>  /* EAL interface */
>>  int eal_trace_init(void);
>>  void eal_trace_fini(void);
>> diff --git a/lib/eal/common/meson.build b/lib/eal/common/meson.build
>> index c1bbf26654..59d5b15708 100644
>> --- a/lib/eal/common/meson.build
>> +++ b/lib/eal/common/meson.build
>> @@ -27,6 +27,7 @@ sources += files(
>>          'eal_common_tailqs.c',
>>          'eal_common_thread.c',
>>          'eal_common_timer.c',
>> +        'eal_common_trace_pmu.c',
>>          'eal_common_trace_points.c',
>>          'eal_common_uuid.c',
>>          'malloc_elem.c',
>> diff --git a/lib/eal/include/rte_eal_trace.h
>> b/lib/eal/include/rte_eal_trace.h index 9ad2112801..9c78f63ff5 100644
>> --- a/lib/eal/include/rte_eal_trace.h
>> +++ b/lib/eal/include/rte_eal_trace.h
>> @@ -127,6 +127,17 @@ RTE_TRACE_POINT(
>>
>>  #define RTE_EAL_TRACE_GENERIC_FUNC
>> rte_eal_trace_generic_func(__func__)
>>
>> +#ifdef RTE_LIB_PMU
>> +#include <rte_pmu.h>
>> +
>> +RTE_TRACE_POINT_FP(
>> +    rte_eal_trace_pmu_read,
>> +    RTE_TRACE_POINT_ARGS(unsigned int index),
>> +    uint64_t val = rte_pmu_read(index);
>> +    rte_trace_point_emit_u64(val);
>> +)
>> +#endif
>> +
>>  #ifdef __cplusplus
>>  }
>>  #endif
>> diff --git a/lib/eal/version.map b/lib/eal/version.map index
>> f493cd1ca7..3dc147f848 100644
>> --- a/lib/eal/version.map
>> +++ b/lib/eal/version.map
>> @@ -399,6 +399,7 @@ EXPERIMENTAL {
>>
>>      # added in 24.11
>>      rte_bitset_to_str;
>> +    __rte_eal_trace_pmu_read; # WINDOWS_NO_EXPORT
>>  };
>>
>>  INTERNAL {
>> diff --git a/lib/pmu/rte_pmu.c b/lib/pmu/rte_pmu.c index
>> dd57961627..b65f08c75b 100644
>> --- a/lib/pmu/rte_pmu.c
>> +++ b/lib/pmu/rte_pmu.c
>> @@ -412,12 +412,75 @@ rte_pmu_add_event(const char *name)
>>      return event->index;
>>  }
>>
>> +static int
>> +add_events(const char *pattern)
>> +{
>> +    char *token, *copy;
>> +    int ret = 0;
>> +
>> +    copy = strdup(pattern);
>> +    if (copy == NULL)
>> +            return -ENOMEM;
>> +
>> +    token = strtok(copy, ",");
>> +    while (token) {
>> +            ret = rte_pmu_add_event(token);
>> +            if (ret < 0)
>> +                    break;
>> +
>> +            token = strtok(NULL, ",");
>> +    }
>> +
>> +    free(copy);
>> +
>> +    return ret >= 0 ? 0 : ret;
>> +}
>> +
>> +int
>> +rte_pmu_add_events_by_pattern(const char *pattern) {
>> +    regmatch_t rmatch;
>> +    char buf[BUFSIZ];
>> +    unsigned int num;
>> +    regex_t reg;
>> +    int ret;
>> +
>> +    /* events are matched against occurrences of e=ev1[,ev2,..] pattern */
>> +    ret = regcomp(&reg, "e=([_[:alnum:]-],?)+", REG_EXTENDED);
>> +    if (ret) {
>> +            PMU_LOG(ERR, "Failed to compile event matching regexp");
>> +            return -EINVAL;
>> +    }
>> +
>> +    for (;;) {
>> +            if (regexec(&reg, pattern, 1, &rmatch, 0))
>> +                    break;
>> +
>> +            num = rmatch.rm_eo - rmatch.rm_so;
>> +            if (num > sizeof(buf))
>> +                    num = sizeof(buf);
>> +
>> +            /* skip e= pattern prefix */
>> +            memcpy(buf, pattern + rmatch.rm_so + 2, num - 2);
>> +            buf[num - 2] = '\0';
>> +            ret = add_events(buf);
>> +            if (ret)
>> +                    break;
>> +
>> +            pattern += rmatch.rm_eo;
>> +    }
>> +
>> +    regfree(&reg);
>> +
>> +    return ret;
>> +}
>> +
>>  int
>>  rte_pmu_init(void)
>>  {
>>      int ret;
>>
>> -    if (rte_pmu.initialized)
>> +    if (rte_pmu.initialized && ++rte_pmu.initialized)
>
>Stupid q, why not:
>if (rte_pmu.initialized++ > 0) return 0?
>

If you call rte_pmu_init() and it fails, then rte_pmu.initialized stays at 1. 
It means that libpmu is initialized
but obviously that's not the case. On the other hand 1 means that there's one 
user, since this variable serves 
both as a flag and reference counter, which is true in fact. So only half of 
the story is true but we want the 
whole story to be true. 

Besides, if first call fails then second one will return early hence caller 
won’t be able to initialize libpmu but this time due to a different reason. 

And last thing, that expression resembles one used in rte_pmu_fini(). 

>Also increment/decrement counter implies that init()/fini() can be called 
>multiple times, correct?
>If so, there is still an implicit assumption that it always will happen in a 
>sequential manner?
>

Yes, that should happen in sequential manner. Frankly, that counter should stay 
at 1 for the most of the time.
The only exception is when you configure DPDK with -Denable_trace_fp=true and 
run functional test (pmu_autotest). 
Then that counter jumps to 2 during test and gets pulled down to 1 at the end. 

>>              return 0;
>>
>>      ret = scan_pmus();
>> @@ -450,7 +513,7 @@ rte_pmu_fini(void)
>>      struct rte_pmu_event_group *group;
>>      unsigned int i;
>>
>> -    if (!rte_pmu.initialized)
>> +    if (!rte_pmu.initialized || --rte_pmu.initialized)
>>              return;
>>
>>      RTE_TAILQ_FOREACH_SAFE(event, &rte_pmu.event_list, next, tmp_event)
>> { diff --git a/lib/pmu/rte_pmu.h b/lib/pmu/rte_pmu.h index
>> 85f9127911..df571f0a2f 100644
>> --- a/lib/pmu/rte_pmu.h
>> +++ b/lib/pmu/rte_pmu.h
>> @@ -135,7 +135,7 @@ __rte_pmu_enable_group(struct rte_pmu_event_group 
>> *group);
>>   * @warning
>>   * @b EXPERIMENTAL: this API may change without prior notice
>>   *
>> - * Initialize PMU library.
>> + * Initialize PMU library. It's safe to call it multiple times.
>>   *
>>   * @return
>>   *   0 in case of success, negative value otherwise.
>> @@ -148,7 +148,9 @@ rte_pmu_init(void);
>>   * @warning
>>   * @b EXPERIMENTAL: this API may change without prior notice
>>   *
>> - * Finalize PMU library.
>> + * Finalize PMU library. Number of calls must match number
>> + * of times rte_pmu_init() was called. Otherwise memory
>> + * won't be freed properly.
>>   */
>>  __rte_experimental
>>  void
>> @@ -173,7 +175,23 @@ rte_pmu_add_event(const char *name);
>>   * @warning
>>   * @b EXPERIMENTAL: this API may change without prior notice
>>   *
>> - * Read hardware counter configured to count occurrences of an event.
>> + * Add events matching pattern to the group of enabled events.
>> + *
>> + * @param pattern
>> + *   Pattern e=ev1[,ev2,...] matching events, where evX is a placeholder 
>> for an event listed
>under
>> + *   /sys/bus/event_source/devices/<pmu>/events.
>> + */
>> +__rte_experimental
>> +int
>> +rte_pmu_add_events_by_pattern(const char *pattern);
>> +
>> +/**
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change without prior notice
>> + *
>> + * Read hardware counter configured to count occurrences of an event.
>> +This is called by an lcore
>> + * binded exclusively to particular cpu and may not work as expected if 
>> gets migrated elsewhere.
>> + * Reason being event group is pinned hence not supposed to be multiplexed 
>> with any other
>events.
>>   *
>>   * @param index
>>   *   Index of an event to be read.
>> diff --git a/lib/pmu/version.map b/lib/pmu/version.map index
>> d0f907d13d..f14d498b54 100644
>> --- a/lib/pmu/version.map
>> +++ b/lib/pmu/version.map
>> @@ -5,6 +5,7 @@ EXPERIMENTAL {
>>      __rte_pmu_enable_group;
>>      rte_pmu;
>>      rte_pmu_add_event;
>> +    rte_pmu_add_events_by_pattern;
>>      rte_pmu_fini;
>>      rte_pmu_init;
>>      rte_pmu_read;
>> --
>> 2.34.1

Reply via email to