In async datapath, if the length of data buffer
is less than 256, the data will be copied by CPU
instead of DMA.

Signed-off-by: Wenwu Ma <wenwux...@intel.com>
---
 lib/vhost/vhost.h      |  6 ++-
 lib/vhost/virtio_net.c | 96 ++++++++++++++++++++++++++++++------------
 2 files changed, 73 insertions(+), 29 deletions(-)

diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 9209558465..d0da53aa46 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -125,8 +125,10 @@ struct vring_used_elem_packed {
  * iovec
  */
 struct vhost_iovec {
-       void *src_addr;
-       void *dst_addr;
+       void *src_io_addr;
+       void *dst_io_addr;
+       void *src_virt_addr;
+       void *dst_virt_addr;
        size_t len;
 };
 
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 3085905d17..46f35ac05f 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -26,6 +26,8 @@
 
 #define MAX_BATCH_LEN 256
 
+#define CPU_COPY_THRESHOLD_LEN 256
+
 /* DMA device copy operation tracking array. */
 struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX];
 
@@ -61,28 +63,37 @@ vhost_async_dma_transfer_one(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
        int copy_idx = 0;
        uint32_t nr_segs = pkt->nr_segs;
        uint16_t i;
+       bool is_cpu_copy = true;
 
        if (rte_dma_burst_capacity(dma_id, vchan_id) < nr_segs)
                return -1;
 
        for (i = 0; i < nr_segs; i++) {
-               copy_idx = rte_dma_copy(dma_id, vchan_id, 
(rte_iova_t)iov[i].src_addr,
-                               (rte_iova_t)iov[i].dst_addr, iov[i].len, 
RTE_DMA_OP_FLAG_LLC);
-               /**
-                * Since all memory is pinned and DMA vChannel
-                * ring has enough space, failure should be a
-                * rare case. If failure happens, it means DMA
-                * device encounters serious errors; in this
-                * case, please stop async data-path and check
-                * what has happened to DMA device.
-                */
-               if (unlikely(copy_idx < 0)) {
-                       if (!vhost_async_dma_copy_log) {
-                               VHOST_LOG_DATA(ERR, "(%s) DMA copy failed for 
channel %d:%u\n",
+               if (iov[i].len > CPU_COPY_THRESHOLD_LEN) {
+                       copy_idx = rte_dma_copy(dma_id, vchan_id,
+                                       (rte_iova_t)iov[i].src_io_addr,
+                                       (rte_iova_t)iov[i].dst_io_addr,
+                                       iov[i].len, RTE_DMA_OP_FLAG_LLC);
+                       /**
+                        * Since all memory is pinned and DMA vChannel
+                        * ring has enough space, failure should be a
+                        * rare case. If failure happens, it means DMA
+                        * device encounters serious errors; in this
+                        * case, please stop async data-path and check
+                        * what has happened to DMA device.
+                        */
+                       if (unlikely(copy_idx < 0)) {
+                               if (!vhost_async_dma_copy_log) {
+                                       VHOST_LOG_DATA(ERR,
+                                               "(%s) DMA copy failed for 
channel %d:%u\n",
                                                dev->ifname, dma_id, vchan_id);
-                               vhost_async_dma_copy_log = true;
+                                       vhost_async_dma_copy_log = true;
+                               }
+                               return -1;
                        }
-                       return -1;
+                       is_cpu_copy = false;
+               } else {
+                       rte_memcpy(iov[i].dst_virt_addr, iov[i].src_virt_addr, 
iov[i].len);
                }
        }
 
@@ -90,7 +101,13 @@ vhost_async_dma_transfer_one(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
         * Only store packet completion flag address in the last copy's
         * slot, and other slots are set to NULL.
         */
-       dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = 
&vq->async->pkts_cmpl_flag[flag_idx];
+       if (is_cpu_copy == false) {
+               dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] =
+                       &vq->async->pkts_cmpl_flag[flag_idx];
+       } else {
+               vq->async->pkts_cmpl_flag[flag_idx] = true;
+               nr_segs = 0;
+       }
 
        return nr_segs;
 }
@@ -123,6 +140,19 @@ vhost_async_dma_transfer(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 
        rte_spinlock_unlock(&dma_info->dma_lock);
 
+       if (unlikely(ret < 0 && pkt_idx > 0)) {
+               do {
+                       head_idx = (head_idx == 0) ? vq->size : head_idx - 1;
+                       if (vq->async->pkts_cmpl_flag[head_idx] == false)
+                               break;
+
+                       pkt_idx--;
+                       vq->async->pkts_cmpl_flag[head_idx] = false;
+                       if (pkt_idx == 0)
+                               break;
+               } while (1);
+       }
+
        return pkt_idx;
 }
 
@@ -943,7 +973,7 @@ async_iter_initialize(struct virtio_net *dev, struct 
vhost_async *async)
 
 static __rte_always_inline int
 async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async,
-               void *src, void *dst, size_t len)
+               void *io_src, void *io_dst, void *virt_src, void *virt_dst, 
size_t len)
 {
        struct vhost_iov_iter *iter;
        struct vhost_iovec *iovec;
@@ -962,8 +992,10 @@ async_iter_add_iovec(struct virtio_net *dev, struct 
vhost_async *async,
        iter = async->iov_iter + async->iter_idx;
        iovec = async->iovec + async->iovec_idx;
 
-       iovec->src_addr = src;
-       iovec->dst_addr = dst;
+       iovec->src_io_addr = io_src;
+       iovec->dst_io_addr = io_dst;
+       iovec->src_virt_addr = virt_src;
+       iovec->dst_virt_addr = virt_dst;
        iovec->len = len;
 
        iter->nr_segs++;
@@ -999,12 +1031,13 @@ async_iter_reset(struct vhost_async *async)
 static __rte_always_inline int
 async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
                struct rte_mbuf *m, uint32_t mbuf_offset,
-               uint64_t buf_iova, uint32_t cpy_len, bool to_desc)
+               uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len, bool 
to_desc)
 {
        struct vhost_async *async = vq->async;
        uint64_t mapped_len;
        uint32_t buf_offset = 0;
-       void *src, *dst;
+       void *io_src, *io_dst;
+       void *virt_src, *virt_dst;
        void *host_iova;
 
        while (cpy_len) {
@@ -1016,16 +1049,23 @@ async_fill_seg(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
                        return -1;
                }
 
+
                if (to_desc) {
-                       src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, 
mbuf_offset);
-                       dst = host_iova;
+                       io_src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, 
mbuf_offset);
+                       io_dst = host_iova;
+
+                       virt_src = rte_pktmbuf_mtod_offset(m, void *, 
mbuf_offset);
+                       virt_dst = (void *)(buf_addr + buf_offset);
                } else {
-                       src = host_iova;
-                       dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, 
mbuf_offset);
+                       io_src = host_iova;
+                       io_dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, 
mbuf_offset);
+
+                       virt_src = (void *)(buf_addr + buf_offset);
+                       virt_dst = rte_pktmbuf_mtod_offset(m, void *, 
mbuf_offset);
                }
 
-               if (unlikely(async_iter_add_iovec(dev, async, src, dst,
-                                                (size_t)mapped_len)))
+               if (unlikely(async_iter_add_iovec(dev, async, io_src, io_dst,
+                                               virt_src, virt_dst, 
(size_t)mapped_len)))
                        return -1;
 
                cpy_len -= (uint32_t)mapped_len;
@@ -1169,6 +1209,7 @@ mbuf_to_desc(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 
                if (is_async) {
                        if (async_fill_seg(dev, vq, m, mbuf_offset,
+                                               buf_addr + buf_offset,
                                                buf_iova + buf_offset, cpy_len, 
true) < 0)
                                goto error;
                } else {
@@ -2562,6 +2603,7 @@ desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 
                if (is_async) {
                        if (async_fill_seg(dev, vq, m, mbuf_offset,
+                                               buf_addr + buf_offset,
                                                buf_iova + buf_offset, cpy_len, 
false) < 0)
                                goto error;
                } else {
-- 
2.25.1

Reply via email to