> From: Morten Brørup
> Sent: Saturday, 5 November 2022 14.19
> 
> Zero-copy access to the mempool cache is beneficial for PMD
> performance, and must be provided by the mempool library to fix [Bug
> 1052] without a performance regression.
> 
> [Bug 1052]: https://bugs.dpdk.org/show_bug.cgi?id=1052
> 
> 
> This RFC offers two conceptual variants of zero-copy get:
> 1. A simple version.
> 2. A version where existing (hot) objects in the cache are moved to the
> top of the cache before new objects from the backend driver are pulled
> in.
> 
> I would like some early feedback. Also, which variant do you prefer?
> 
> Notes:
> * Allowing the 'cache' parameter to be NULL, and getting it from the
> mempool instead, was inspired by rte_mempool_cache_flush().

"instead" -> "in this case"

> * Asserting that the 'mp' parameter is not NULL is not done by other
> functions, so I omitted it here too.
> 
> NB: Please ignore formatting. Also, this code has not even been compile
> tested.

And I just spotted an error: the rte_memcpy() length field must be multiplied 
by sizeof(void*).

> 
> 1. Simple version:
> 
> /**
>  * Get objects from a mempool via zero-copy access to a user-owned
> mempool cache.
>  *
>  * @param cache
>  *   A pointer to the mempool cache.
>  * @param mp
>  *   A pointer to the mempool.
>  * @param n
>  *   The number of objects to prefetch into the mempool cache.
>  * @return
>  *   The pointer to the objects in the mempool cache.
>  *   NULL on error
>  *   with rte_errno set appropriately.
>  */
> static __rte_always_inline void *
> rte_mempool_cache_get_bulk(struct rte_mempool_cache *cache,
>         struct rte_mempool *mp,
>         unsigned int n)
> {
>     unsigned int len;
> 
>     if (cache == NULL)
>         cache = rte_mempool_default_cache(mp, rte_lcore_id());
>     if (cache == NULL) {
>         rte_errno = EINVAL;
>         goto fail;
>     }
> 
>     rte_mempool_trace_cache_get_bulk(cache, mp, n);
> 
>     len = cache->len;
> 
>     if (unlikely(n > len)) {
>         unsigned int size;
> 
>         if (unlikely(n > RTE_MEMPOOL_CACHE_MAX_SIZE)) {
>             rte_errno = EINVAL;
>             goto fail;
>         }
> 
>         /* Fill the cache from the backend; fetch size + requested -
> len objects. */
>         size = cache->size;
> 
>         ret = rte_mempool_ops_dequeue_bulk(mp, &cache->objs[len], size
> + n - len);
>         if (unlikely(ret < 0)) {
>             /*
>              * We are buffer constrained.
>              * Do not fill the cache, just satisfy the request.
>              */
>             ret = rte_mempool_ops_dequeue_bulk(mp, &cache->objs[len], n
> - len);
>             if (unlikely(ret < 0)) {
>                 rte_errno = -ret;
>                 goto fail;
>             }
> 
>             len = 0;
>         } else
>             len = size;
>     } else
>         len -= n;
> 
>     cache->len = len;
> 
>     RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
>     RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
> 
>     return &cache->objs[len];
> 
> fail:
> 
>     RTE_MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
>     RTE_MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
> 
>     return NULL;
> }
> 
> 
> 2. Advanced version:
> 
> /**
>  * Get objects from a mempool via zero-copy access to a user-owned
> mempool cache.
>  *
>  * @param cache
>  *   A pointer to the mempool cache.
>  * @param mp
>  *   A pointer to the mempool.
>  * @param n
>  *   The number of objects to prefetch into the mempool cache.
>  * @return
>  *   The pointer to the objects in the mempool cache.
>  *   NULL on error
>  *   with rte_errno set appropriately.
>  */
> static __rte_always_inline void *
> rte_mempool_cache_get_bulk(struct rte_mempool_cache *cache,
>         struct rte_mempool *mp,
>         unsigned int n)
> {
>     unsigned int len;
> 
>     if (cache == NULL)
>         cache = rte_mempool_default_cache(mp, rte_lcore_id());
>     if (cache == NULL) {
>         rte_errno = EINVAL;
>         goto fail;
>     }
> 
>     rte_mempool_trace_cache_get_bulk(cache, mp, n);
> 
>     len = cache->len;
> 
>     if (unlikely(n > len)) {
>         unsigned int size;
> 
>         if (unlikely(n > RTE_MEMPOOL_CACHE_MAX_SIZE)) {
>             rte_errno = EINVAL;
>             goto fail;
>         }
> 
>         /* Fill the cache from the backend; fetch size + requested -
> len objects. */
>         size = cache->size;
> 
>         if (likely(size + n >= 2 * len)) {
>             /*
>              * No overlap when copying (dst >= len): size + n - len >=
> len.
>              * Move (i.e. copy) the existing objects in the cache to
> the
>              * coming top of the cache, to make room for new objects
> below.
>              */
>             rte_memcpy(&cache->objs[size + n - len], &cache->objs[0],
> len);

Length is bytes, not number of objects, so that should be:

rte_memcpy(&cache->objs[size + n - len], &cache->objs[0], len * sizeof(void*));

> 
>             /* Fill the cache below the existing objects in the cache.
> */
>             ret = rte_mempool_ops_dequeue_bulk(mp, &cache->objs[0],
> size + n - len);
>             if (unlikely(ret < 0)) {
>                 goto constrained;
>             } else
>                 len = size;
>         } else {
>             /* Fill the cache on top of any objects in it. */
>             ret = rte_mempool_ops_dequeue_bulk(mp, &cache->objs[len],
> size + n - len);
>             if (unlikely(ret < 0)) {
> 
> constrained:
>                 /*
>                  * We are buffer constrained.
>                  * Do not fill the cache, just satisfy the request.
>                  */
>                 ret = rte_mempool_ops_dequeue_bulk(mp, &cache-
> >objs[len], n - len);
>                 if (unlikely(ret < 0)) {
>                     rte_errno = -ret;
>                     goto fail;
>                 }
> 
>                 len = 0;
>             } else
>                 len = size;
>         }
>     } else
>         len -= n;
> 
>     cache->len = len;
> 
>     RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
>     RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
> 
>     return &cache->objs[len];
> 
> fail:
> 
>     RTE_MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
>     RTE_MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
> 
>     return NULL;
> }
> 
> 
> Med venlig hilsen / Kind regards,
> -Morten Brørup

Reply via email to