On Wed, Feb 26, 2025 at 03:59:22PM +0000, Morten Brørup wrote:
> The comparisons lcore_id < RTE_MAX_LCORE and lcore_id != LCORE_ID_ANY are
> equivalent, but the latter compiles to fewer bytes of code space.
> Similarly for lcore_id >= RTE_MAX_LCORE and lcore_id == LCORE_ID_ANY.
> 
> The rte_mempool_get_ops() function is also used in the fast path, so
> RTE_VERIFY() was replaced by RTE_ASSERT().
> 
> Compilers implicitly consider comparisons of variable == 0 likely, so
> unlikely() was added to the check for no mempool cache (mp->cache_size ==
> 0) in the rte_mempool_default_cache() function.
> 
> The rte_mempool_do_generic_put() function for adding objects to a mempool
> was refactored as follows:
> - The comparison for the request itself being too big, which is considered
>   unlikely, was moved down and out of the code path where the cache has
>   sufficient room for the added objects, which is considered the most
>   likely code path.
> - Added __rte_assume() about the cache length, size and threshold, for
>   compiler optimization when "n" is compile time constant.
> - Added __rte_assume() about "ret" being zero, so other functions using
>   the value returned by this function can be potentially optimized by the
>   compiler; especially when it merges multiple sequential code paths of
>   inlined code depending on the return value being either zero or
>   negative.
> - The refactored source code (with comments) made the separate comment
>   describing the cache flush/add algorithm superfluous, so it was removed.
> 
> A few more likely()/unlikely() were added.

In general not a big fan of using likely/unlikely, but if they give a perf
benefit, we should probably take them.

Few more comments inline below.

> A few comments were improved for readability.
> 
> Some assertions, RTE_ASSERT(), were added. Most importantly to assert that
> the return values of the mempool drivers' enqueue and dequeue operations
> are API compliant, i.e. 0 (for success) or negative (for failure), and
> never positive.
> 
> Signed-off-by: Morten Brørup <m...@smartsharesystems.com>

Acked-by: Bruce Richardson <bruce.richard...@intel.com>

> ---
>  lib/mempool/rte_mempool.h | 67 ++++++++++++++++++++++-----------------
>  1 file changed, 38 insertions(+), 29 deletions(-)
> 
> diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
> index c495cc012f..aedc100964 100644
> --- a/lib/mempool/rte_mempool.h
> +++ b/lib/mempool/rte_mempool.h
> @@ -334,7 +334,7 @@ struct __rte_cache_aligned rte_mempool {
>  #ifdef RTE_LIBRTE_MEMPOOL_STATS
>  #define RTE_MEMPOOL_STAT_ADD(mp, name, n) do {                               
>    \
>               unsigned int __lcore_id = rte_lcore_id();                       
> \
> -             if (likely(__lcore_id < RTE_MAX_LCORE))                         
> \
> +             if (likely(__lcore_id != LCORE_ID_ANY))                         
> \

Is this not opening up the possibility of runtime crashes, if __lcore_id is
invalid? I see from the commit log, you say the change in comparison
results in smaller code gen, but it does leave undefined behaviour when
__lcore_id == 500, for example.

>                       (mp)->stats[__lcore_id].name += (n);                    
> \
>               else                                                            
> \
>                       
> rte_atomic_fetch_add_explicit(&((mp)->stats[RTE_MAX_LCORE].name),  \
> @@ -751,7 +751,7 @@ extern struct rte_mempool_ops_table rte_mempool_ops_table;
>  static inline struct rte_mempool_ops *
>  rte_mempool_get_ops(int ops_index)
>  {
> -     RTE_VERIFY((ops_index >= 0) && (ops_index < RTE_MEMPOOL_MAX_OPS_IDX));
> +     RTE_ASSERT((ops_index >= 0) && (ops_index < RTE_MEMPOOL_MAX_OPS_IDX));
>  
>       return &rte_mempool_ops_table.ops[ops_index];
>  }
> @@ -791,7 +791,8 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
>       rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n);
>       ops = rte_mempool_get_ops(mp->ops_index);
>       ret = ops->dequeue(mp, obj_table, n);
> -     if (ret == 0) {
> +     RTE_ASSERT(ret <= 0);
> +     if (likely(ret == 0)) {
>               RTE_MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1);
>               RTE_MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n);
>       }
> @@ -816,11 +817,14 @@ rte_mempool_ops_dequeue_contig_blocks(struct 
> rte_mempool *mp,
>               void **first_obj_table, unsigned int n)
>  {
>       struct rte_mempool_ops *ops;
> +     int ret;
>  
>       ops = rte_mempool_get_ops(mp->ops_index);
>       RTE_ASSERT(ops->dequeue_contig_blocks != NULL);
>       rte_mempool_trace_ops_dequeue_contig_blocks(mp, first_obj_table, n);
> -     return ops->dequeue_contig_blocks(mp, first_obj_table, n);
> +     ret = ops->dequeue_contig_blocks(mp, first_obj_table, n);
> +     RTE_ASSERT(ret <= 0);
> +     return ret;
>  }
>  
>  /**
> @@ -848,6 +852,7 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void 
> * const *obj_table,
>       rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n);
>       ops = rte_mempool_get_ops(mp->ops_index);
>       ret = ops->enqueue(mp, obj_table, n);
> +     RTE_ASSERT(ret <= 0);
>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>       if (unlikely(ret < 0))
>               RTE_MEMPOOL_LOG(CRIT, "cannot enqueue %u objects to mempool %s",
> @@ -1333,10 +1338,10 @@ rte_mempool_cache_free(struct rte_mempool_cache 
> *cache);
>  static __rte_always_inline struct rte_mempool_cache *
>  rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
>  {
> -     if (mp->cache_size == 0)
> +     if (unlikely(mp->cache_size == 0))
>               return NULL;
>  
> -     if (lcore_id >= RTE_MAX_LCORE)
> +     if (unlikely(lcore_id == LCORE_ID_ANY))
>               return NULL;
>  

Again, I'd be concerned about the resiliency of this. But I suppose having
an invalid lcore id is just asking for problems and crashes later.

>       rte_mempool_trace_default_cache(mp, lcore_id,
> @@ -1383,32 +1388,33 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, 
> void * const *obj_table,
>  {

Reply via email to