On 16/12/2022 10:21, Robin Jarry wrote:
Report the same information than rte_lcore_dump() in the telemetry
API into /eal/lcore/list and /eal/lcore/info,ID.
Example:
--> /eal/lcore/info,3
{
"/eal/lcore/info": {
"lcore_id": 3,
"socket": 0,
"role": "RTE",
"cpuset": [
3
]
}
}
Signed-off-by: Robin Jarry <rja...@redhat.com>
Acked-by: Morten Brørup <m...@smartsharesystems.com>
---
v4 -> v5: No change
lib/eal/common/eal_common_lcore.c | 96 +++++++++++++++++++++++++++++++
1 file changed, 96 insertions(+)
diff --git a/lib/eal/common/eal_common_lcore.c
b/lib/eal/common/eal_common_lcore.c
index 06c594b0224f..16548977dce8 100644
--- a/lib/eal/common/eal_common_lcore.c
+++ b/lib/eal/common/eal_common_lcore.c
@@ -10,6 +10,9 @@
#include <rte_errno.h>
#include <rte_lcore.h>
#include <rte_log.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <rte_telemetry.h>
+#endif
#include "eal_private.h"
#include "eal_thread.h"
@@ -456,3 +459,96 @@ rte_lcore_dump(FILE *f)
{
rte_lcore_iterate(lcore_dump_cb, f);
}
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+static int
+lcore_telemetry_id_cb(unsigned int lcore_id, void *arg)
+{
+ struct rte_tel_data *d = arg;
+ return rte_tel_data_add_array_int(d, lcore_id);
+}
+
+static int
+handle_lcore_list(const char *cmd __rte_unused,
+ const char *params __rte_unused,
+ struct rte_tel_data *d)
+{
+ int ret = rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
+ if (ret)
+ return ret;
+ return rte_lcore_iterate(lcore_telemetry_id_cb, d);
+}
+
+struct lcore_telemetry_info {
+ unsigned int lcore_id;
+ struct rte_tel_data *d;
+};
+
+static int
+lcore_telemetry_info_cb(unsigned int lcore_id, void *arg)
+{
+ struct lcore_telemetry_info *info = arg;
+ struct rte_config *cfg = rte_eal_get_configuration();
+ struct rte_tel_data *cpuset;
+ const char *role;
+ unsigned int cpu;
+
+ if (info->lcore_id != lcore_id)
+ return 0;
+
+ switch (cfg->lcore_role[lcore_id]) {
+ case ROLE_RTE:
+ role = "RTE";
+ break;
+ case ROLE_SERVICE:
+ role = "SERVICE";
+ break;
+ case ROLE_NON_EAL:
+ role = "NON_EAL";
+ break;
+ default:
+ role = "UNKNOWN";
+ break;
+ }
+ rte_tel_data_start_dict(info->d);
+ rte_tel_data_add_dict_int(info->d, "lcore_id", lcore_id);
+ rte_tel_data_add_dict_int(info->d, "socket",
rte_lcore_to_socket_id(lcore_id));
+ rte_tel_data_add_dict_string(info->d, "role", role);
+ cpuset = rte_tel_data_alloc();
+ if (!cpuset)
+ return -ENOMEM;
+ rte_tel_data_start_array(cpuset, RTE_TEL_INT_VAL);
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++)
+ if (CPU_ISSET(cpu, &lcore_config[lcore_id].cpuset))
+ rte_tel_data_add_array_int(cpuset, cpu);
+ rte_tel_data_add_dict_container(info->d, "cpuset", cpuset, 0);
+
+ return 0;
+}
+
+static int
+handle_lcore_info(const char *cmd __rte_unused, const char *params, struct
rte_tel_data *d)
+{
+ struct lcore_telemetry_info info = { .d = d };
+ char *endptr = NULL;
+ if (params == NULL || strlen(params) == 0)
+ return -EINVAL;
+ errno = 0;
+ info.lcore_id = strtoul(params, &endptr, 10);
+ if (errno)
+ return -errno;
+ if (endptr == params)
+ return -EINVAL;
+ return rte_lcore_iterate(lcore_telemetry_info_cb, &info);
+}
+
+RTE_INIT(lcore_telemetry)
+{
+ rte_telemetry_register_cmd(
+ "/eal/lcore/list", handle_lcore_list,
+ "List of lcore ids. Takes no parameters");
+ rte_telemetry_register_cmd(
+ "/eal/lcore/info", handle_lcore_info,
+ "Returns lcore info. Parameters: int lcore_id");
+}
+#endif /* !RTE_EXEC_ENV_WINDOWS */
Hi Robin,
Thanks for taking the time to work on this. It is a good implementation
for debug use-cases.
I have 2 suggestions which would improve the usability of the data:
1. Could we make the lcore_id paramater on /eal/lcore/info optional?
This would allow users to read info for all lcores in the application at
once.
2. Could we add 2 additional telemetry endpoints? One which returns an
array of busy_cycles values and the other returns an array of
total_cycles values. These arrays could be used in conjunction with the
/eal/lcore/list endpoint to quickly read the usage related metrics. I've
included an example diff below [1].
We have a use-case beyond debugging in which we read telemetry every few
milliseconds. From a performance point of view, adding the 2 additional
endpoints would be very beneficial.
Thanks,
Kevin
[1]
diff --git a/lib/eal/common/eal_common_lcore.c
b/lib/eal/common/eal_common_lcore.c
index 210636d21d..94ddb276c5 100644
--- a/lib/eal/common/eal_common_lcore.c
+++ b/lib/eal/common/eal_common_lcore.c
@@ -569,6 +569,32 @@ handle_lcore_info(const char *cmd __rte_unused,
const char *params, struct rte_t
return rte_lcore_iterate(lcore_telemetry_info_cb, &info);
}
+static int
+lcore_telemetry_busy_cycles_cb(unsigned int lcore_id, void *arg)
+{
+ struct rte_tel_data *d = arg;
+ struct rte_lcore_usage usage;
+ rte_lcore_usage_cb usage_cb;
+ unsigned long cycles = 0;
+
+ memset(&usage, 0, sizeof(usage));
+ usage_cb = lcore_usage_cb;
+ if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0)
+ cycles = usage.busy_cycles;
+
+ return rte_tel_data_add_array_u64(d, cycles);
+}
+
+static int
+handle_lcore_busy_cycles(const char *cmd __rte_unused,
+ const char *params __rte_unused, struct rte_tel_data *d)
+{
+ int ret = rte_tel_data_start_array(d, RTE_TEL_U64_VAL);
+ if (ret)
+ return ret;
+ return rte_lcore_iterate(lcore_telemetry_busy_cycles_cb, d);
+}
+
RTE_INIT(lcore_telemetry)
{
rte_telemetry_register_cmd(
@@ -577,5 +603,8 @@ RTE_INIT(lcore_telemetry)
rte_telemetry_register_cmd(
"/eal/lcore/info", handle_lcore_info,
"Returns lcore info. Parameters: int lcore_id");
+ rte_telemetry_register_cmd(
+ "/eal/lcore/busy_cycles", handle_lcore_busy_cycles,
+ "List of busy cycle values. Takes no parameters");
}
#endif /* !RTE_EXEC_ENV_WINDOWS */