On Fri, Oct 11, 2019 at 02:09:47PM -0300, Flavio Leitner wrote:
> The rte_vhost_dequeue_burst supports two ways of dequeuing data.
> If the data fits into a buffer, then all data is copied and a
> single linear buffer is returned. Otherwise it allocates
> additional mbufs and chains them together to return a multiple
> segments mbuf.
> 
> While that covers most use cases, it forces applications that
> need to work with larger data sizes to support multiple segments
> mbufs. The non-linear characteristic brings complexity and
> performance implications to the application.
> 
> To resolve the issue, add support to attach external buffer
> to a pktmbuf and let the host provide during registration if
> attaching an external buffer to pktmbuf is supported and if
> only linear buffer are supported.
> 
> Signed-off-by: Flavio Leitner <f...@sysclose.org>
> ---
>  doc/guides/prog_guide/vhost_lib.rst |  35 +++++++++
>  lib/librte_vhost/rte_vhost.h        |   4 ++
>  lib/librte_vhost/socket.c           |  22 ++++++
>  lib/librte_vhost/vhost.c            |  22 ++++++
>  lib/librte_vhost/vhost.h            |   4 ++
>  lib/librte_vhost/virtio_net.c       | 108 ++++++++++++++++++++++++----
>  6 files changed, 181 insertions(+), 14 deletions(-)

Thanks for the new version!
Overall looks good to me, few minor comments inlined.


> +static int
> +virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint16_t size)
> +{
> +     struct rte_mbuf_ext_shared_info *shinfo = NULL;
> +     uint16_t buf_len = 0;
> +     rte_iova_t iova;
> +     void *buf;
> +
> +     /* Try to use pkt buffer to store shinfo to reduce the amount of memory
> +      * required, otherwise store shinfo in the new buffer.
> +      */
> +     if (rte_pktmbuf_tailroom(pkt) > sizeof(*shinfo))

How about "rte_pktmbuf_tailroom(pkt) >= sizeof(*shinfo)"?

> +             shinfo = rte_pktmbuf_mtod(pkt,
> +                                       struct rte_mbuf_ext_shared_info *);
> +     else
> +             buf_len += sizeof(*shinfo);

The space taken by rte_pktmbuf_ext_shinfo_init_helper() may
be bigger than this depending on buf_end.

https://github.com/DPDK/dpdk/blob/cc8627bc6d7a7bd3ccc2653b746aac4fbaa0bc50/lib/librte_mbuf/rte_mbuf.h#L1585-L1589

> +
> +     if (unlikely(buf_len + size + RTE_PKTMBUF_HEADROOM > UINT16_MAX)) {
> +             RTE_LOG(ERR, VHOST_DATA,

We may have fallback available when extbuf alloc fails,
logging this as error may be too noisy.

> +                     "buffer size %d exceeded maximum.\n", buf_len);
> +             return -ENOSPC;
> +     }
> +
> +     buf_len += size + RTE_PKTMBUF_HEADROOM;
> +     buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
> +     if (unlikely(buf == NULL))
> +             return -ENOMEM;
> +
> +     /* initialize shinfo */
> +     if (shinfo) {
> +             shinfo->free_cb = virtio_dev_extbuf_free;
> +             shinfo->fcb_opaque = buf;
> +             rte_mbuf_ext_refcnt_set(shinfo, 1);
> +     } else {
> +             shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
> +                                           virtio_dev_extbuf_free, buf);
> +             if (unlikely(shinfo == NULL)) {
> +                     RTE_LOG(ERR, VHOST_DATA, "Failed to init shinfo\n");
> +                     return -1;

buf should be freed.


> @@ -1343,26 +1429,23 @@ virtio_dev_tx_split(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>       for (i = 0; i < count; i++) {
>               struct buf_vector buf_vec[BUF_VECTOR_MAX];
>               uint16_t head_idx;
> -             uint32_t dummy_len;
> +             uint32_t buf_len;
>               uint16_t nr_vec = 0;
>               int err;
>  
>               if (unlikely(fill_vec_buf_split(dev, vq,
>                                               vq->last_avail_idx + i,
>                                               &nr_vec, buf_vec,
> -                                             &head_idx, &dummy_len,
> +                                             &head_idx, &buf_len,
>                                               VHOST_ACCESS_RO) < 0))
>                       break;
>  
>               if (likely(dev->dequeue_zero_copy == 0))
>                       update_shadow_used_ring_split(vq, head_idx, 0);
>  
> -             pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
> -             if (unlikely(pkts[i] == NULL)) {
> -                     RTE_LOG(ERR, VHOST_DATA,
> -                             "Failed to allocate memory for mbuf.\n");
> +             pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len);

The len in descriptor set by guests is 32bit, we may not
want to trim it to 16bit unconditionally like this.

> +             if (unlikely(pkts[i] == NULL))
>                       break;
> -             }
>  
>               err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
>                               mbuf_pool);
> @@ -1451,14 +1534,14 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>       for (i = 0; i < count; i++) {
>               struct buf_vector buf_vec[BUF_VECTOR_MAX];
>               uint16_t buf_id;
> -             uint32_t dummy_len;
> +             uint32_t buf_len;
>               uint16_t desc_count, nr_vec = 0;
>               int err;
>  
>               if (unlikely(fill_vec_buf_packed(dev, vq,
>                                               vq->last_avail_idx, &desc_count,
>                                               buf_vec, &nr_vec,
> -                                             &buf_id, &dummy_len,
> +                                             &buf_id, &buf_len,
>                                               VHOST_ACCESS_RO) < 0))
>                       break;
>  
> @@ -1466,12 +1549,9 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>                       update_shadow_used_ring_packed(vq, buf_id, 0,
>                                       desc_count);
>  
> -             pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
> -             if (unlikely(pkts[i] == NULL)) {
> -                     RTE_LOG(ERR, VHOST_DATA,
> -                             "Failed to allocate memory for mbuf.\n");
> +             pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len);

Ditto.

> +             if (unlikely(pkts[i] == NULL))
>                       break;
> -             }
>  
>               err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
>                               mbuf_pool);
> -- 
> 2.20.1
> 

Reply via email to