Re: [dpdk-dev] [PATCH v3] vhost: fix async copy fail on multi-page buffers

Xia, Chenbo Mon, 27 Jul 2020 06:14:55 -0700

Hi Patrick,

> -----Original Message-----
> From: Fu, Patrick <patrick...@intel.com>
> Sent: Monday, July 27, 2020 2:33 PM
> To: dev@dpdk.org; maxime.coque...@redhat.com; Xia, Chenbo
> <chenbo....@intel.com>
> Cc: Fu, Patrick <patrick...@intel.com>
> Subject: [PATCH v3] vhost: fix async copy fail on multi-page buffers
> 
> From: Patrick Fu <patrick...@intel.com>
> 
> Async copy fails when single ring buffer vector is splited on multiple 
> physical
> pages. This happens because current hpa address translation function doesn't
> handle multi-page buffers. A new gpa to hpa address conversion function, which
> returns the hpa on the first hitting host pages, is implemented in this patch.
> Async data path recursively calls this new function to construct a 
> multi-segments
> async copy descriptor for ring buffers crossing physical page boundaries.
> 
> Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring")
> 
> Signed-off-by: Patrick Fu <patrick...@intel.com>
> ---
> v2:
>  - change commit message and title
>  - v1 patch used CPU to copy multi-page buffers; v2 patch split the copy into
> multiple async copy segments whenever possible
> 
> v3:
>  - added fixline
> 
>  lib/librte_vhost/vhost.h      | 50 +++++++++++++++++++++++++++++++++++
>  lib/librte_vhost/virtio_net.c | 40 +++++++++++++++++-----------
>  2 files changed, 75 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index
> 0f7212f88..05c202a57 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -616,6 +616,56 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa,
> uint64_t size)
>       return 0;
>  }
> 
> +static __rte_always_inline rte_iova_t
> +gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
> +     uint64_t gpa_size, uint64_t *hpa_size) {
> +     uint32_t i;
> +     struct guest_page *page;
> +     struct guest_page key;
> +
> +     *hpa_size = gpa_size;
> +     if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
> +             key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1);
> +             page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
> +                            sizeof(struct guest_page), guest_page_addrcmp);
> +             if (page) {
> +                     if (gpa + gpa_size <=
> +                                     page->guest_phys_addr + page->size) {
> +                             return gpa - page->guest_phys_addr +
> +                                     page->host_phys_addr;
> +                     } else if (gpa < page->guest_phys_addr +
> +                                             page->size) {
> +                             *hpa_size = page->guest_phys_addr +
> +                                     page->size - gpa;
> +                             return gpa - page->guest_phys_addr +
> +                                     page->host_phys_addr;
> +                     }
> +             }
> +     } else {
> +             for (i = 0; i < dev->nr_guest_pages; i++) {
> +                     page = &dev->guest_pages[i];
> +
> +                     if (gpa >= page->guest_phys_addr) {
> +                             if (gpa + gpa_size <


Should the '<' be '<=' here?

> +                                     page->guest_phys_addr + page->size) {
> +                                     return gpa - page->guest_phys_addr +
> +                                             page->host_phys_addr;
> +                             } else if (gpa < page->guest_phys_addr +
> +                                                     page->size) {
> +                                     *hpa_size = page->guest_phys_addr +
> +                                             page->size - gpa;
> +                                     return gpa - page->guest_phys_addr +
> +                                             page->host_phys_addr;
> +                             }
> +                     }
> +             }
> +     }
> +
> +     *hpa_size = 0;
> +     return 0;
> +}
> +
>  static __rte_always_inline uint64_t
>  hva_to_gpa(struct virtio_net *dev, uint64_t vva, uint64_t len)  { diff --git
> a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index
> 95a0bc19f..124a33a10 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -980,6 +980,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
>       struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
>       struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
>       int error = 0;
> +     uint64_t mapped_len;
> 
>       uint32_t tlen = 0;
>       int tvec_idx = 0;
> @@ -1072,24 +1073,31 @@ async_mbuf_to_desc(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
> 
>               cpy_len = RTE_MIN(buf_avail, mbuf_avail);
> 
> -             if (unlikely(cpy_len >= cpy_threshold)) {
> -                     hpa = (void *)(uintptr_t)gpa_to_hpa(dev,
> -                                     buf_iova + buf_offset, cpy_len);
> +             while (unlikely(cpy_len && cpy_len >= cpy_threshold)) {
> +                     hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev,
> +                                     buf_iova + buf_offset,
> +                                     cpy_len, &mapped_len);
> 
> -                     if (unlikely(!hpa)) {
> -                             error = -1;
> -                             goto out;
> -                     }
> +                     if (unlikely(!hpa || mapped_len < cpy_threshold))
> +                             break;
> 
>                       async_fill_vec(src_iovec + tvec_idx,
>                               (void *)(uintptr_t)rte_pktmbuf_iova_offset(m,
> -                                             mbuf_offset), cpy_len);
> +                             mbuf_offset), (size_t)mapped_len);
> 
> -                     async_fill_vec(dst_iovec + tvec_idx, hpa, cpy_len);
> +                     async_fill_vec(dst_iovec + tvec_idx,
> +                                     hpa, (size_t)mapped_len);
> 
> -                     tlen += cpy_len;
> +                     tlen += (uint32_t)mapped_len;
> +                     cpy_len -= (uint32_t)mapped_len;
> +                     mbuf_avail  -= (uint32_t)mapped_len;
> +                     mbuf_offset += (uint32_t)mapped_len;
> +                     buf_avail  -= (uint32_t)mapped_len;
> +                     buf_offset += (uint32_t)mapped_len;

Will it be ok we just transform the uint64_t to uint32_t here?
What if mapped_len > MAX uint32_t ?

Thanks!
Chenbo

>                       tvec_idx++;
> -             } else {
> +             }
> +
> +             if (likely(cpy_len)) {
>                       if (unlikely(vq->batch_copy_nb_elems >= vq->size)) {
>                               rte_memcpy(
>                               (void *)((uintptr_t)(buf_addr + buf_offset)),
> @@ -1112,10 +1120,12 @@ async_mbuf_to_desc(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
>                       }
>               }
> 
> -             mbuf_avail  -= cpy_len;
> -             mbuf_offset += cpy_len;
> -             buf_avail  -= cpy_len;
> -             buf_offset += cpy_len;
> +             if (cpy_len) {
> +                     mbuf_avail  -= cpy_len;
> +                     mbuf_offset += cpy_len;
> +                     buf_avail  -= cpy_len;
> +                     buf_offset += cpy_len;
> +             }
>       }
> 
>  out:
> --
> 2.18.4

Re: [dpdk-dev] [PATCH v3] vhost: fix async copy fail on multi-page buffers

Reply via email to