Hi Joyce, On Thu, Mar 18, 2021 at 07:20:22PM +0800, Joyce Kong wrote: > If cache is enabled, objects will be retrieved/put from/to cache, > subsequently from/to the common pool. Now the debug stats calculate > the objects retrieved/put from/to cache and pool together, it is > better to distinguish the data number from local cache and common > pool.
This is indeed a very useful information, thanks for proposing this. Please see some comments below. > Signed-off-by: Joyce Kong <joyce.k...@arm.com> > --- > lib/librte_mempool/rte_mempool.c | 12 ++++++ > lib/librte_mempool/rte_mempool.h | 64 ++++++++++++++++++++++---------- > 2 files changed, 57 insertions(+), 19 deletions(-) > > diff --git a/lib/librte_mempool/rte_mempool.c > b/lib/librte_mempool/rte_mempool.c > index afb1239c8..9cb69367a 100644 > --- a/lib/librte_mempool/rte_mempool.c > +++ b/lib/librte_mempool/rte_mempool.c > @@ -1244,8 +1244,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) > for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { > sum.put_bulk += mp->stats[lcore_id].put_bulk; > sum.put_objs += mp->stats[lcore_id].put_objs; > + sum.put_objs_cache += mp->stats[lcore_id].put_objs_cache; > + sum.put_objs_pool += mp->stats[lcore_id].put_objs_pool; > + sum.put_objs_flush += mp->stats[lcore_id].put_objs_flush; > sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; > sum.get_success_objs += mp->stats[lcore_id].get_success_objs; > + sum.get_success_objs_cache += > mp->stats[lcore_id].get_success_objs_cache; > + sum.get_success_objs_pool += > mp->stats[lcore_id].get_success_objs_pool; > + sum.get_success_objs_refill += > mp->stats[lcore_id].get_success_objs_refill; > sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; > sum.get_fail_objs += mp->stats[lcore_id].get_fail_objs; > sum.get_success_blks += mp->stats[lcore_id].get_success_blks; > @@ -1254,8 +1260,14 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) > fprintf(f, " stats:\n"); > fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); > fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); > + fprintf(f, " put_objs_cache=%"PRIu64"\n", sum.put_objs_cache); > + fprintf(f, " put_objs_pool=%"PRIu64"\n", sum.put_objs_pool); > + fprintf(f, " put_objs_flush=%"PRIu64"\n", sum.put_objs_flush); > fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); > fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); > + fprintf(f, " get_success_objs_cache=%"PRIu64"\n", > sum.get_success_objs_cache); > + fprintf(f, " get_success_objs_pool=%"PRIu64"\n", > sum.get_success_objs_pool); > + fprintf(f, " get_success_objs_refill=%"PRIu64"\n", > sum.get_success_objs_refill); > fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); > fprintf(f, " get_fail_objs=%"PRIu64"\n", sum.get_fail_objs); > if (info.contig_block_size > 0) { > diff --git a/lib/librte_mempool/rte_mempool.h > b/lib/librte_mempool/rte_mempool.h > index c551cf733..29d80d97e 100644 > --- a/lib/librte_mempool/rte_mempool.h > +++ b/lib/librte_mempool/rte_mempool.h > @@ -66,12 +66,18 @@ extern "C" { > * A structure that stores the mempool statistics (per-lcore). > */ > struct rte_mempool_debug_stats { > - uint64_t put_bulk; /**< Number of puts. */ > - uint64_t put_objs; /**< Number of objects successfully put. */ > - uint64_t get_success_bulk; /**< Successful allocation number. */ > - uint64_t get_success_objs; /**< Objects successfully allocated. */ > - uint64_t get_fail_bulk; /**< Failed allocation number. */ > - uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ > + uint64_t put_bulk; /**< Number of puts. */ > + uint64_t put_objs; /**< Number of objects successfully > put. */ > + uint64_t put_objs_cache; /**< Number of objects successfully > put to cache. */ > + uint64_t put_objs_pool; /**< Number of objects successfully > put to pool. */ > + uint64_t put_objs_flush; /**< Number of flushing objects from > cache to pool. */ > + uint64_t get_success_bulk; /**< Successful allocation number. */ > + uint64_t get_success_objs; /**< Objects successfully allocated. > */ > + uint64_t get_success_objs_cache; /**< Objects successfully allocated > from cache. */ > + uint64_t get_success_objs_pool; /**< Objects successfully allocated > from pool. */ > + uint64_t get_success_objs_refill; /**< Number of refilling objects from > pool to cache. */ > + uint64_t get_fail_bulk; /**< Failed allocation number. */ > + uint64_t get_fail_objs; /**< Objects that failed to be > allocated. */ What about having instead the following new stats: - put_common_pool_bulk: number of bulks enqueued in common pool - put_common_pool_objs: number of objects enqueued in common pool - get_common_pool_bulk: number of bulks dequeued from common pool - get_common_pool_objs: number of objects dequeued from common pool It looks easier to me to understand, compared to flush/refill. Especially, having 'objs' in the name makes me think that it counts a number of objects, but it's not the case. > /** Successful allocation number of contiguous blocks. */ > uint64_t get_success_blks; > /** Failed allocation number of contiguous blocks. */ > @@ -270,22 +276,34 @@ struct rte_mempool { > * Number to add to the object-oriented statistics. > */ > #ifdef RTE_LIBRTE_MEMPOOL_DEBUG > -#define __MEMPOOL_STAT_ADD(mp, name, n) do { \ > - unsigned __lcore_id = rte_lcore_id(); \ > - if (__lcore_id < RTE_MAX_LCORE) { \ > +#define __MEMPOOL_STAT_ADD(mp, name, n) do { \ > + unsigned __lcore_id = rte_lcore_id(); \ > + if (__lcore_id < RTE_MAX_LCORE) { \ > mp->stats[__lcore_id].name##_objs += n; \ > - mp->stats[__lcore_id].name##_bulk += 1; \ > - } \ > - } while(0) > -#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do { > \ > - unsigned int __lcore_id = rte_lcore_id(); \ > - if (__lcore_id < RTE_MAX_LCORE) { \ > + mp->stats[__lcore_id].name##_bulk += 1; \ > + } \ > + } while (0) > +#define __MEMPOOL_OBJS_STAT_ADD(mp, name1, name2, n) do { \ > + unsigned __lcore_id = rte_lcore_id(); \ > + if (__lcore_id < RTE_MAX_LCORE) \ > + mp->stats[__lcore_id].name1##_objs_##name2 += n; > \ > + } while (0) > +#define __MEMPOOL_OBJS_STAT_SUB(mp, name1, name2, n) do { \ > + unsigned __lcore_id = rte_lcore_id(); \ > + if (__lcore_id < RTE_MAX_LCORE) \ > + mp->stats[__lcore_id].name1##_objs_##name2 -= n; > \ > + } while (0) > +#define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do { \ > + unsigned int __lcore_id = rte_lcore_id(); \ > + if (__lcore_id < RTE_MAX_LCORE) { \ > mp->stats[__lcore_id].name##_blks += n; \ > mp->stats[__lcore_id].name##_bulk += 1; \ > - } \ > + } \ > } while (0) There are too many stats macros. I think that the original __MEMPOOL_STAT_ADD() macro can be reused if using the stats name I'm suggesting. Else, if not using names ending with _objs and _bulks, it would be better to rework the macro to something more generic, like this: #define __MEMPOOL_STAT_ADD(mp, name, n) do { \ unsigned __lcore_id = rte_lcore_id(); \ if (__lcore_id < RTE_MAX_LCORE) \ mp->stats[__lcore_id].name += n; \ } while (0) In this case, it could replace both __MEMPOOL_STAT_ADD() and __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(), and could be in a separate patch, before this one. It would replace the 6 macro calls by 12, but would be clearer because one can see the real field name. Eventually the macro could take the lcore_id as a parameter. > #else > -#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) > +#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while (0) > +#define __MEMPOOL_OBJS_STAT_ADD(mp, name1, name2, n) do {} while (0) > +#define __MEMPOOL_OBJS_STAT_SUB(mp, name1, nmae2, n) do {} while (0) > #define __MEMPOOL_CONTIG_BLOCKS_STAT_ADD(mp, name, n) do {} while (0) > #endif > > @@ -1305,10 +1323,13 @@ __mempool_generic_put(struct rte_mempool *mp, void * > const *obj_table, > > /* Add elements back into the cache */ > rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n); > - > + __MEMPOOL_OBJS_STAT_ADD(mp, put, cache, n); > cache->len += n; > > if (cache->len >= cache->flushthresh) { > + __MEMPOOL_OBJS_STAT_SUB(mp, put, cache, cache->len - > cache->size); I don't think it is a good idea to decrease a statistic counter. If using put_common_pool_bulk/put_common_pool_objs, I think the code could go in rte_mempool_ops_enqueue_bulk() (and in rte_mempool_ops_dequeue_bulk() for 'get' stats). > + __MEMPOOL_OBJS_STAT_ADD(mp, put, pool, cache->len - > cache->size); > + __MEMPOOL_OBJS_STAT_ADD(mp, put, flush, 1); > rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size], > cache->len - cache->size); > cache->len = cache->size; > @@ -1318,6 +1339,7 @@ __mempool_generic_put(struct rte_mempool *mp, void * > const *obj_table, > > ring_enqueue: > > + __MEMPOOL_OBJS_STAT_ADD(mp, put, pool, n); > /* push remaining objects in ring */ > #ifdef RTE_LIBRTE_MEMPOOL_DEBUG > if (rte_mempool_ops_enqueue_bulk(mp, obj_table, n) < 0) > @@ -1437,6 +1459,7 @@ __mempool_generic_get(struct rte_mempool *mp, void > **obj_table, > goto ring_dequeue; > } > > + __MEMPOOL_OBJS_STAT_ADD(mp, get_success, refill, 1); > cache->len += req; > } > > @@ -1447,6 +1470,7 @@ __mempool_generic_get(struct rte_mempool *mp, void > **obj_table, > cache->len -= n; > > __MEMPOOL_STAT_ADD(mp, get_success, n); > + __MEMPOOL_OBJS_STAT_ADD(mp, get_success, cache, n); > > return 0; > > @@ -1457,8 +1481,10 @@ __mempool_generic_get(struct rte_mempool *mp, void > **obj_table, > > if (ret < 0) > __MEMPOOL_STAT_ADD(mp, get_fail, n); > - else > + else { > __MEMPOOL_STAT_ADD(mp, get_success, n); > + __MEMPOOL_OBJS_STAT_ADD(mp, get_success, pool, n); > + } > > return ret; > } > -- > 2.30.0 >