On 7/28/20 5:28 AM, patrick...@intel.com wrote:
> From: Patrick Fu <patrick...@intel.com>
>
> Async copy fails when single ring buffer vector is splited on multiple
> physical pages. This happens because current hpa address translation
> function doesn't handle multi-page buffers. A new gpa to hpa address
> conversion function, which returns the hpa on the first hitting host
> pages, is implemented in this patch. Async data path recursively calls
> this new function to construct a multi-segments async copy descriptor
> for ring buffers crossing physical page boundaries.
>
> Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring")
>
> Signed-off-by: Patrick Fu <patrick...@intel.com>
> ---
> v2:
> - change commit message and title
> - v1 patch used CPU to copy multi-page buffers; v2 patch split the
> copy into multiple async copy segments whenever possible
>
> v3:
> - added fixline
>
> v4:
> - fix miss translation of the gpa which is the same length with host
> page size
>
> lib/librte_vhost/vhost.h | 50 +++++++++++++++++++++++++++++++++++
> lib/librte_vhost/virtio_net.c | 40 +++++++++++++++++-----------
> 2 files changed, 75 insertions(+), 15 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 0f7212f88..05c202a57 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -616,6 +616,56 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa,
> uint64_t size)
> return 0;
> }
>
> +static __rte_always_inline rte_iova_t
> +gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
> + uint64_t gpa_size, uint64_t *hpa_size)
> +{
> + uint32_t i;
> + struct guest_page *page;
> + struct guest_page key;
> +
> + *hpa_size = gpa_size;
> + if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
> + key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1);
> + page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
> + sizeof(struct guest_page), guest_page_addrcmp);
> + if (page) {
> + if (gpa + gpa_size <=
> + page->guest_phys_addr + page->size) {
> + return gpa - page->guest_phys_addr +
> + page->host_phys_addr;
> + } else if (gpa < page->guest_phys_addr +
> + page->size) {
> + *hpa_size = page->guest_phys_addr +
> + page->size - gpa;
> + return gpa - page->guest_phys_addr +
> + page->host_phys_addr;
> + }
> + }
> + } else {
> + for (i = 0; i < dev->nr_guest_pages; i++) {
> + page = &dev->guest_pages[i];
> +
> + if (gpa >= page->guest_phys_addr) {
> + if (gpa + gpa_size <=
> + page->guest_phys_addr + page->size) {
> + return gpa - page->guest_phys_addr +
> + page->host_phys_addr;
> + } else if (gpa < page->guest_phys_addr +
> + page->size) {
> + *hpa_size = page->guest_phys_addr +
> + page->size - gpa;
> + return gpa - page->guest_phys_addr +
> + page->host_phys_addr;
> + }
> + }
> + }
> + }
> +
> + *hpa_size = 0;
> + return 0;
> +}
> +
> static __rte_always_inline uint64_t
> hva_to_gpa(struct virtio_net *dev, uint64_t vva, uint64_t len)
> {
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 95a0bc19f..124a33a10 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -980,6 +980,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
> struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
> struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
> int error = 0;
> + uint64_t mapped_len;
>
> uint32_t tlen = 0;
> int tvec_idx = 0;
> @@ -1072,24 +1073,31 @@ async_mbuf_to_desc(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
>
> cpy_len = RTE_MIN(buf_avail, mbuf_avail);
>
> - if (unlikely(cpy_len >= cpy_threshold)) {
> - hpa = (void *)(uintptr_t)gpa_to_hpa(dev,
> - buf_iova + buf_offset, cpy_len);
> + while (unlikely(cpy_len && cpy_len >= cpy_threshold)) {
> + hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev,
> + buf_iova + buf_offset,
> + cpy_len, &mapped_len);
>
> - if (unlikely(!hpa)) {
> - error = -1;
> - goto out;
> - }
> + if (unlikely(!hpa || mapped_len < cpy_threshold))
> + break;
>
> async_fill_vec(src_iovec + tvec_idx,
> (void *)(uintptr_t)rte_pktmbuf_iova_offset(m,
> - mbuf_offset), cpy_len);
> + mbuf_offset), (size_t)mapped_len);
>
> - async_fill_vec(dst_iovec + tvec_idx, hpa, cpy_len);
> + async_fill_vec(dst_iovec + tvec_idx,
> + hpa, (size_t)mapped_len);
>
> - tlen += cpy_len;
> + tlen += (uint32_t)mapped_len;
> + cpy_len -= (uint32_t)mapped_len;
> + mbuf_avail -= (uint32_t)mapped_len;
> + mbuf_offset += (uint32_t)mapped_len;
> + buf_avail -= (uint32_t)mapped_len;
> + buf_offset += (uint32_t)mapped_len;
> tvec_idx++;
> - } else {
> + }
> +
> + if (likely(cpy_len)) {
> if (unlikely(vq->batch_copy_nb_elems >= vq->size)) {
> rte_memcpy(
> (void *)((uintptr_t)(buf_addr + buf_offset)),
> @@ -1112,10 +1120,12 @@ async_mbuf_to_desc(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
> }
> }
>
> - mbuf_avail -= cpy_len;
> - mbuf_offset += cpy_len;
> - buf_avail -= cpy_len;
> - buf_offset += cpy_len;
> + if (cpy_len) {
> + mbuf_avail -= cpy_len;
> + mbuf_offset += cpy_len;
> + buf_avail -= cpy_len;
> + buf_offset += cpy_len;
> + }
Is that really necessary to check if copy length is not 0?
Thanks,
Maxime
> }
>
> out:
>