mempool: distinguish debug counters from cache and pool

Dharmik Thakkar Thu, 22 Apr 2021 14:27:38 -0700

Hi Olivier,

Thank you for your comments!


> On Apr 21, 2021, at 11:29 AM, Olivier Matz <olivier.m...@6wind.com> wrote:
> 
> Hi Dharmik,
> 
> Please see some comments below.
> 
> On Mon, Apr 19, 2021 at 07:08:00PM -0500, Dharmik Thakkar wrote:
>> From: Joyce Kong <joyce.k...@arm.com>
>> 
>> If cache is enabled, objects will be retrieved/put from/to cache,
>> subsequently from/to the common pool. Now the debug stats calculate
>> the objects retrieved/put from/to cache and pool together, it is
>> better to distinguish them.
>> 
>> Signed-off-by: Joyce Kong <joyce.k...@arm.com>
>> Signed-off-by: Dharmik Thakkar <dharmik.thak...@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
>> ---
>> lib/librte_mempool/rte_mempool.c | 24 ++++++++++++++++
>> lib/librte_mempool/rte_mempool.h | 47 ++++++++++++++++++++++----------
>> 2 files changed, 57 insertions(+), 14 deletions(-)
>> 
>> diff --git a/lib/librte_mempool/rte_mempool.c 
>> b/lib/librte_mempool/rte_mempool.c
>> index afb1239c8d48..339f14455624 100644
>> --- a/lib/librte_mempool/rte_mempool.c
>> +++ b/lib/librte_mempool/rte_mempool.c
>> @@ -1244,6 +1244,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>>      for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>>              sum.put_bulk += mp->stats[lcore_id].put_bulk;
>>              sum.put_objs += mp->stats[lcore_id].put_objs;
>> +            sum.put_common_pool_bulk +=
>> +                    mp->stats[lcore_id].put_common_pool_bulk;
>> +            sum.put_common_pool_objs +=
>> +                    mp->stats[lcore_id].put_common_pool_objs;
>> +            sum.put_cache_bulk += mp->stats[lcore_id].put_cache_bulk;
>> +            sum.put_cache_objs += mp->stats[lcore_id].put_cache_objs;
>> +            sum.get_common_pool_bulk +=
>> +                    mp->stats[lcore_id].get_common_pool_bulk;
>> +            sum.get_common_pool_objs +=
>> +                    mp->stats[lcore_id].get_common_pool_objs;
>> +            sum.get_cache_bulk += mp->stats[lcore_id].get_cache_bulk;
>> +            sum.get_cache_objs += mp->stats[lcore_id].get_cache_objs;
>>              sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk;
>>              sum.get_success_objs += mp->stats[lcore_id].get_success_objs;
>>              sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk;
>> @@ -1254,6 +1266,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>>      fprintf(f, "  stats:\n");
>>      fprintf(f, "    put_bulk=%"PRIu64"\n", sum.put_bulk);
>>      fprintf(f, "    put_objs=%"PRIu64"\n", sum.put_objs);
>> +    fprintf(f, "    put_common_pool_bulk=%"PRIu64"\n",
>> +                                            sum.put_common_pool_bulk);
>> +    fprintf(f, "    put_common_pool_objs=%"PRIu64"\n",
>> +                                            sum.put_common_pool_objs);
>> +    fprintf(f, "    put_cache_bulk=%"PRIu64"\n", sum.put_cache_bulk);
>> +    fprintf(f, "    put_cache_objs=%"PRIu64"\n", sum.put_cache_objs);
>> +    fprintf(f, "    get_common_pool_bulk=%"PRIu64"\n",
>> +                                            sum.get_common_pool_bulk);
>> +    fprintf(f, "    get_common_pool_objs=%"PRIu64"\n",
>> +                                            sum.get_common_pool_objs);
>> +    fprintf(f, "    get_cache_bulk=%"PRIu64"\n", sum.get_cache_bulk);
>> +    fprintf(f, "    get_cache_objs=%"PRIu64"\n", sum.get_cache_objs);
>>      fprintf(f, "    get_success_bulk=%"PRIu64"\n", sum.get_success_bulk);
>>      fprintf(f, "    get_success_objs=%"PRIu64"\n", sum.get_success_objs);
>>      fprintf(f, "    get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk);
>> diff --git a/lib/librte_mempool/rte_mempool.h 
>> b/lib/librte_mempool/rte_mempool.h
>> index 848a19226149..0959f8a3f367 100644
>> --- a/lib/librte_mempool/rte_mempool.h
>> +++ b/lib/librte_mempool/rte_mempool.h
>> @@ -66,12 +66,20 @@ extern "C" {
>>  * A structure that stores the mempool statistics (per-lcore).
>>  */
>> struct rte_mempool_debug_stats {
>> -    uint64_t put_bulk;         /**< Number of puts. */
>> -    uint64_t put_objs;         /**< Number of objects successfully put. */
>> -    uint64_t get_success_bulk; /**< Successful allocation number. */
>> -    uint64_t get_success_objs; /**< Objects successfully allocated. */
>> -    uint64_t get_fail_bulk;    /**< Failed allocation number. */
>> -    uint64_t get_fail_objs;    /**< Objects that failed to be allocated. */
>> +    uint64_t put_bulk;                /**< Number of puts. */
>> +    uint64_t put_objs;                /**< Number of objects successfully 
>> put. */
>> +    uint64_t put_common_pool_bulk;    /**< Number of bulks enqueued in 
>> common pool. */
>> +    uint64_t put_common_pool_objs;    /**< Number of objects enqueued in 
>> common pool. */
>> +    uint64_t put_cache_bulk;          /**< Number of bulks enqueued in 
>> cache. */
>> +    uint64_t put_cache_objs;          /**< Number of objects enqueued in 
>> cache. */
>> +    uint64_t get_common_pool_bulk;    /**< Number of bulks dequeued from 
>> common pool. */
>> +    uint64_t get_common_pool_objs;    /**< Number of objects dequeued from 
>> common pool. */
>> +    uint64_t get_cache_bulk;          /**< Number of bulks dequeued from 
>> cache. */
>> +    uint64_t get_cache_objs;          /**< Number of objects dequeued from 
>> cache. */
>> +    uint64_t get_success_bulk;        /**< Successful allocation number. */
>> +    uint64_t get_success_objs;        /**< Objects successfully allocated. 
>> */
>> +    uint64_t get_fail_bulk;           /**< Failed allocation number. */
>> +    uint64_t get_fail_objs;           /**< Objects that failed to be 
>> allocated. */
> 
> I missed it the first time, but this changes the size of the
> rte_mempool_debug_stats structure. I think we don't care about this ABI
> breakage because this structure is only defined if
> RTE_LIBRTE_MEMPOOL_DEBUG is set. But just in case, adding Ray as Cc.

Agreed, thank you!

> 
> About the field themselves, I'm not certain that there is an added value
> to have stats for cache gets and puts. My feeling is that the important
> stat to monitor is the access to common pool, because it is the one that
> highlights a possible performance impact (contention). The cache stats
> are more or less equal to "success + fail - common". Moreover, it will
> simplify the patch and avoid risks of mistakes.
> 
> What do you think?

Yes, I think the cache stats can be removed.
Also, please correct me if I’m wrong; but, in my understanding,
the cache stats are equal to “success - common”. Is adding “fail” required?

> 
>>      /** Successful allocation number of contiguous blocks. */
>>      uint64_t get_success_blks;
>>      /** Failed allocation number of contiguous blocks. */
>> @@ -699,10 +707,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
>>              void **obj_table, unsigned n)
>> {
>>      struct rte_mempool_ops *ops;
>> +    int ret;
>> 
>>      rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n);
>>      ops = rte_mempool_get_ops(mp->ops_index);
>> -    return ops->dequeue(mp, obj_table, n);
>> +    ret = ops->dequeue(mp, obj_table, n);
>> +    if (ret == 0) {
>> +            __MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1);
>> +            __MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n);
>> +            __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> +            __MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>> +    }
>> +    return ret;
>> }
>> 
>> /**
>> @@ -749,6 +765,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, 
>> void * const *obj_table,
>> {
>>      struct rte_mempool_ops *ops;
>> 
>> +    __MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1);
>> +    __MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n);
>>      rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n);
>>      ops = rte_mempool_get_ops(mp->ops_index);
>>      return ops->enqueue(mp, obj_table, n);
>> @@ -1297,14 +1315,18 @@ __mempool_generic_put(struct rte_mempool *mp, void * 
>> const *obj_table,
>> 
>>      /* Add elements back into the cache */
>>      rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
>> -
>>      cache->len += n;
>> 
>> +    __MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1);
>> +
>>      if (cache->len >= cache->flushthresh) {
>> +            __MEMPOOL_STAT_ADD(mp, put_cache_objs,
>> +                               n - (cache->len - cache->size));
>>              rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
>>                              cache->len - cache->size);
>>              cache->len = cache->size;
>> -    }
>> +    } else
>> +            __MEMPOOL_STAT_ADD(mp, put_cache_objs, n);
>> 
> 
> In case we keep cache stats, I'd add {} after the else to be consistent
> with the if().

Ack.

> 
>>      return;
>> 
>> @@ -1438,8 +1460,8 @@ __mempool_generic_get(struct rte_mempool *mp, void 
>> **obj_table,
>> 
>>      cache->len -= n;
>> 
>> -    __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> -    __MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>> +    __MEMPOOL_STAT_ADD(mp, get_cache_bulk, 1);
>> +    __MEMPOOL_STAT_ADD(mp, get_cache_objs, n);
> 
> In case we keep cache stats, I don't think we should remove get_success
> stats increment. Else, the success stats will never be incremented when
> retrieving objects from the cache.
> 

Good catch. Thanks!

> 
>> 
>>      return 0;
>> 
>> @@ -1451,9 +1473,6 @@ __mempool_generic_get(struct rte_mempool *mp, void 
>> **obj_table,
>>      if (ret < 0) {
>>              __MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
>>              __MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
>> -    } else {
>> -            __MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> -            __MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>>      }
>> 
>>      return ret;
>> -- 
>> 2.17.1

Re: [dpdk-dev] [PATCH v3 2/2] lib/mempool: distinguish debug counters from cache and pool

Reply via email to