On 2022-10-31 12:26, Morten Brørup wrote:
Offset the stats array index by one, and count non-DPDK threads at index
zero.
This patch provides two benefits:
* Non-DPDK threads are also included in the statistics.
* A conditional in the fast path is removed. Static branch prediction was
correct, so the performance improvement is negligible.
v2:
* New. No v1 of this patch in the series.
Suggested-by: Stephen Hemminger <step...@networkplumber.org>
Signed-off-by: Morten Brørup <m...@smartsharesystems.com>
---
lib/mempool/rte_mempool.c | 2 +-
lib/mempool/rte_mempool.h | 12 ++++++------
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c
index 62d1ce764e..e6208125e0 100644
--- a/lib/mempool/rte_mempool.c
+++ b/lib/mempool/rte_mempool.c
@@ -1272,7 +1272,7 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
#ifdef RTE_LIBRTE_MEMPOOL_STATS
rte_mempool_ops_get_info(mp, &info);
memset(&sum, 0, sizeof(sum));
- for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE + 1; lcore_id++) {
sum.put_bulk += mp->stats[lcore_id].put_bulk;
sum.put_objs += mp->stats[lcore_id].put_objs;
sum.put_common_pool_bulk +=
mp->stats[lcore_id].put_common_pool_bulk;
diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
index 9c4bf5549f..16e7e62e3c 100644
--- a/lib/mempool/rte_mempool.h
+++ b/lib/mempool/rte_mempool.h
@@ -238,8 +238,11 @@ struct rte_mempool {
struct rte_mempool_memhdr_list mem_list; /**< List of memory chunks */
#ifdef RTE_LIBRTE_MEMPOOL_STATS
- /** Per-lcore statistics. */
- struct rte_mempool_debug_stats stats[RTE_MAX_LCORE];
+ /** Per-lcore statistics.
+ *
+ * Offset by one, to include non-DPDK threads.
+ */
+ struct rte_mempool_debug_stats stats[RTE_MAX_LCORE + 1];
#endif
} __rte_cache_aligned;
@@ -304,10 +307,7 @@ struct rte_mempool {
*/
#ifdef RTE_LIBRTE_MEMPOOL_STATS
#define RTE_MEMPOOL_STAT_ADD(mp, name, n) do { \
- unsigned __lcore_id = rte_lcore_id(); \
- if (__lcore_id < RTE_MAX_LCORE) { \
- mp->stats[__lcore_id].name += n; \
- } \
+ (mp)->stats[rte_lcore_id() + 1].name += n; \
This relies on LCORE_ID_ANY being UINT32_MAX, and a wrap to 0, for an
unregistered non-EAL thread? Might be worth a comment, or better a
rewrite with an explicit LCORE_ID_ANY comparison.
You anyways need a conditional. An atomic add must be used in the
unregistered EAL thread case.
} while (0)
#else
#define RTE_MEMPOOL_STAT_ADD(mp, name, n) do {} while (0)