Hi Patrick, > -----Original Message----- > From: Fu, Patrick <patrick...@intel.com> > Sent: Monday, July 27, 2020 2:33 PM > To: dev@dpdk.org; maxime.coque...@redhat.com; Xia, Chenbo > <chenbo....@intel.com> > Cc: Fu, Patrick <patrick...@intel.com> > Subject: [PATCH v3] vhost: fix async copy fail on multi-page buffers > > From: Patrick Fu <patrick...@intel.com> > > Async copy fails when single ring buffer vector is splited on multiple > physical > pages. This happens because current hpa address translation function doesn't > handle multi-page buffers. A new gpa to hpa address conversion function, which > returns the hpa on the first hitting host pages, is implemented in this patch. > Async data path recursively calls this new function to construct a > multi-segments > async copy descriptor for ring buffers crossing physical page boundaries. > > Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring") > > Signed-off-by: Patrick Fu <patrick...@intel.com> > --- > v2: > - change commit message and title > - v1 patch used CPU to copy multi-page buffers; v2 patch split the copy into > multiple async copy segments whenever possible > > v3: > - added fixline > > lib/librte_vhost/vhost.h | 50 +++++++++++++++++++++++++++++++++++ > lib/librte_vhost/virtio_net.c | 40 +++++++++++++++++----------- > 2 files changed, 75 insertions(+), 15 deletions(-) > > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index > 0f7212f88..05c202a57 100644 > --- a/lib/librte_vhost/vhost.h > +++ b/lib/librte_vhost/vhost.h > @@ -616,6 +616,56 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, > uint64_t size) > return 0; > } > > +static __rte_always_inline rte_iova_t > +gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa, > + uint64_t gpa_size, uint64_t *hpa_size) { > + uint32_t i; > + struct guest_page *page; > + struct guest_page key; > + > + *hpa_size = gpa_size; > + if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) { > + key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1); > + page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages, > + sizeof(struct guest_page), guest_page_addrcmp); > + if (page) { > + if (gpa + gpa_size <= > + page->guest_phys_addr + page->size) { > + return gpa - page->guest_phys_addr + > + page->host_phys_addr; > + } else if (gpa < page->guest_phys_addr + > + page->size) { > + *hpa_size = page->guest_phys_addr + > + page->size - gpa; > + return gpa - page->guest_phys_addr + > + page->host_phys_addr; > + } > + } > + } else { > + for (i = 0; i < dev->nr_guest_pages; i++) { > + page = &dev->guest_pages[i]; > + > + if (gpa >= page->guest_phys_addr) { > + if (gpa + gpa_size <
Should the '<' be '<=' here? > + page->guest_phys_addr + page->size) { > + return gpa - page->guest_phys_addr + > + page->host_phys_addr; > + } else if (gpa < page->guest_phys_addr + > + page->size) { > + *hpa_size = page->guest_phys_addr + > + page->size - gpa; > + return gpa - page->guest_phys_addr + > + page->host_phys_addr; > + } > + } > + } > + } > + > + *hpa_size = 0; > + return 0; > +} > + > static __rte_always_inline uint64_t > hva_to_gpa(struct virtio_net *dev, uint64_t vva, uint64_t len) { diff --git > a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index > 95a0bc19f..124a33a10 100644 > --- a/lib/librte_vhost/virtio_net.c > +++ b/lib/librte_vhost/virtio_net.c > @@ -980,6 +980,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct > vhost_virtqueue *vq, > struct batch_copy_elem *batch_copy = vq->batch_copy_elems; > struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; > int error = 0; > + uint64_t mapped_len; > > uint32_t tlen = 0; > int tvec_idx = 0; > @@ -1072,24 +1073,31 @@ async_mbuf_to_desc(struct virtio_net *dev, struct > vhost_virtqueue *vq, > > cpy_len = RTE_MIN(buf_avail, mbuf_avail); > > - if (unlikely(cpy_len >= cpy_threshold)) { > - hpa = (void *)(uintptr_t)gpa_to_hpa(dev, > - buf_iova + buf_offset, cpy_len); > + while (unlikely(cpy_len && cpy_len >= cpy_threshold)) { > + hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev, > + buf_iova + buf_offset, > + cpy_len, &mapped_len); > > - if (unlikely(!hpa)) { > - error = -1; > - goto out; > - } > + if (unlikely(!hpa || mapped_len < cpy_threshold)) > + break; > > async_fill_vec(src_iovec + tvec_idx, > (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, > - mbuf_offset), cpy_len); > + mbuf_offset), (size_t)mapped_len); > > - async_fill_vec(dst_iovec + tvec_idx, hpa, cpy_len); > + async_fill_vec(dst_iovec + tvec_idx, > + hpa, (size_t)mapped_len); > > - tlen += cpy_len; > + tlen += (uint32_t)mapped_len; > + cpy_len -= (uint32_t)mapped_len; > + mbuf_avail -= (uint32_t)mapped_len; > + mbuf_offset += (uint32_t)mapped_len; > + buf_avail -= (uint32_t)mapped_len; > + buf_offset += (uint32_t)mapped_len; Will it be ok we just transform the uint64_t to uint32_t here? What if mapped_len > MAX uint32_t ? Thanks! Chenbo > tvec_idx++; > - } else { > + } > + > + if (likely(cpy_len)) { > if (unlikely(vq->batch_copy_nb_elems >= vq->size)) { > rte_memcpy( > (void *)((uintptr_t)(buf_addr + buf_offset)), > @@ -1112,10 +1120,12 @@ async_mbuf_to_desc(struct virtio_net *dev, struct > vhost_virtqueue *vq, > } > } > > - mbuf_avail -= cpy_len; > - mbuf_offset += cpy_len; > - buf_avail -= cpy_len; > - buf_offset += cpy_len; > + if (cpy_len) { > + mbuf_avail -= cpy_len; > + mbuf_offset += cpy_len; > + buf_avail -= cpy_len; > + buf_offset += cpy_len; > + } > } > > out: > -- > 2.18.4