[dpdk-dev] [PATCH RFC 1/1] vhost: add DMADEV support for async datapath

Sunil Pai G Mon, 23 Aug 2021 02:54:35 -0700

This patch simplifies the async data path enablement for applications
by allowing just the DMADEV ID to be passed as a parameter rather
than having to implement its own logic to enable DMA offload.


Remove the callbacks transfer_data and check_completed_copies
and utilize the generic DMADEV API's to perform packet copy for
vhost async datapath.


Signed-off-by: Sunil Pai G <sunil.pa...@intel.com>
---
 lib/vhost/meson.build       |   2 +-
 lib/vhost/rte_vhost_async.h |  55 +------
 lib/vhost/vhost.c           |  46 +++---
 lib/vhost/vhost.h           |  24 ++-
 lib/vhost/virtio_net.c      | 311 +++++++++++++++++++++++++++++++-----
 5 files changed, 316 insertions(+), 122 deletions(-)

diff --git a/lib/vhost/meson.build b/lib/vhost/meson.build
index 2d8fe0239f..bea17ed4f5 100644
--- a/lib/vhost/meson.build
+++ b/lib/vhost/meson.build
@@ -34,4 +34,4 @@ headers = files(
         'rte_vhost_async.h',
         'rte_vhost_crypto.h',
 )
-deps += ['ethdev', 'cryptodev', 'hash', 'pci']
+deps += ['ethdev', 'cryptodev', 'hash', 'pci', 'dmadev']
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index a37588188c..0d9706d52b 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -42,47 +42,8 @@ struct rte_vhost_async_status {
 };
 
 /**
- * dma operation callbacks to be implemented by applications
+ * in-flight async packet information
  */
-struct rte_vhost_async_channel_ops {
-       /**
-        * instruct async engines to perform copies for a batch of packets
-        *
-        * @param vid
-        *  id of vhost device to perform data copies
-        * @param queue_id
-        *  queue id to perform data copies
-        * @param descs
-        *  an array of DMA transfer memory descriptors
-        * @param opaque_data
-        *  opaque data pair sending to DMA engine
-        * @param count
-        *  number of elements in the "descs" array
-        * @return
-        *  number of descs processed, negative value means error
-        */
-       int32_t (*transfer_data)(int vid, uint16_t queue_id,
-               struct rte_vhost_async_desc *descs,
-               struct rte_vhost_async_status *opaque_data,
-               uint16_t count);
-       /**
-        * check copy-completed packets from the async engine
-        * @param vid
-        *  id of vhost device to check copy completion
-        * @param queue_id
-        *  queue id to check copy completion
-        * @param opaque_data
-        *  buffer to receive the opaque data pair from DMA engine
-        * @param max_packets
-        *  max number of packets could be completed
-        * @return
-        *  number of async descs completed, negative value means error
-        */
-       int32_t (*check_completed_copies)(int vid, uint16_t queue_id,
-               struct rte_vhost_async_status *opaque_data,
-               uint16_t max_packets);
-};
-
 struct async_nethdr {
        struct virtio_net_hdr hdr;
        bool valid;
@@ -132,8 +93,7 @@ struct rte_vhost_async_config {
  */
 __rte_experimental
 int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
-       struct rte_vhost_async_config config,
-       struct rte_vhost_async_channel_ops *ops);
+       struct rte_vhost_async_config config);
 
 /**
  * Unregister an async channel for a vhost queue
@@ -168,8 +128,7 @@ int rte_vhost_async_channel_unregister(int vid, uint16_t 
queue_id);
  */
 __rte_experimental
 int rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id,
-       struct rte_vhost_async_config config,
-       struct rte_vhost_async_channel_ops *ops);
+       struct rte_vhost_async_config config);
 
 /**
  * Unregister an async channel for a vhost queue without performing any
@@ -218,7 +177,7 @@ int rte_vhost_async_channel_unregister_thread_unsafe(int 
vid,
 __rte_experimental
 uint16_t rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
                struct rte_mbuf **pkts, uint16_t count,
-               struct rte_mbuf **comp_pkts, uint32_t *comp_count);
+               struct rte_mbuf **comp_pkts, uint32_t *comp_count, int 
dmadev_id);
 
 /**
  * This function checks async completion status for a specific vhost
@@ -238,7 +197,7 @@ uint16_t rte_vhost_submit_enqueue_burst(int vid, uint16_t 
queue_id,
  */
 __rte_experimental
 uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
-               struct rte_mbuf **pkts, uint16_t count);
+               struct rte_mbuf **pkts, uint16_t count, int dmadev_id);
 
 /**
  * This function returns the amount of in-flight packets for the vhost
@@ -274,7 +233,7 @@ int rte_vhost_async_get_inflight(int vid, uint16_t 
queue_id);
  */
 __rte_experimental
 uint16_t rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
-               struct rte_mbuf **pkts, uint16_t count);
+               struct rte_mbuf **pkts, uint16_t count, int dmadev_id);
 /**
  * This function tries to receive packets from the guest with offloading
  * large copies to the async channel. The packets that are transfer completed
@@ -300,6 +259,6 @@ __rte_experimental
 uint16_t
 rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
        struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
-       int *nr_inflight);
+       int *nr_inflight, int dmadev_id);
 
 #endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 355ff37651..3fdba5949a 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -340,6 +340,7 @@ cleanup_device(struct virtio_net *dev, int destroy)
 static void
 vhost_free_async_mem(struct vhost_virtqueue *vq)
 {
+       rte_free(vq->dma_completions);
        rte_free(vq->async_pkts_info);
 
        rte_free(vq->async_buffers_packed);
@@ -350,6 +351,7 @@ vhost_free_async_mem(struct vhost_virtqueue *vq)
        rte_free(vq->it_pool);
        rte_free(vq->vec_pool);
 
+       vq->dma_completions = NULL;
        vq->async_pkts_info = NULL;
        vq->it_pool = NULL;
        vq->vec_pool = NULL;
@@ -1621,8 +1623,7 @@ int rte_vhost_extern_callback_register(int vid,
 
 static __rte_always_inline int
 async_channel_register(int vid, uint16_t queue_id,
-               struct rte_vhost_async_config config,
-               struct rte_vhost_async_channel_ops *ops)
+               struct rte_vhost_async_config config)
 {
        struct virtio_net *dev = get_device(vid);
        struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
@@ -1691,8 +1692,17 @@ async_channel_register(int vid, uint16_t queue_id,
                }
        }
 
-       vq->async_ops.check_completed_copies = ops->check_completed_copies;
-       vq->async_ops.transfer_data = ops->transfer_data;
+       vq->dma_completions = rte_malloc_socket(NULL,
+               sizeof(struct dma_completions_t),
+               RTE_CACHE_LINE_SIZE, vq->numa_node);
+       if (!vq->dma_completions) {
+               vhost_free_async_mem(vq);
+               VHOST_LOG_CONFIG(ERR,
+                       "async register failed: cannot allocate memory 
dma_completions ring "
+                       "(vid %d, qid: %d)\n", vid, queue_id);
+               return -1;
+       }
+
        vq->async_threshold = config.async_threshold;
 
        vq->async_registered = true;
@@ -1702,14 +1712,13 @@ async_channel_register(int vid, uint16_t queue_id,
 
 int
 rte_vhost_async_channel_register(int vid, uint16_t queue_id,
-               struct rte_vhost_async_config config,
-               struct rte_vhost_async_channel_ops *ops)
+               struct rte_vhost_async_config config)
 {
        struct vhost_virtqueue *vq;
        struct virtio_net *dev = get_device(vid);
        int ret;
 
-       if (dev == NULL || ops == NULL)
+       if (dev == NULL)
                return -1;
 
        if (queue_id >= VHOST_MAX_VRING)
@@ -1727,12 +1736,8 @@ rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
                return -1;
        }
 
-       if (unlikely(ops->check_completed_copies == NULL ||
-               ops->transfer_data == NULL))
-               return -1;
-
        rte_spinlock_lock(&vq->access_lock);
-       ret = async_channel_register(vid, queue_id, config, ops);
+       ret = async_channel_register(vid, queue_id, config);
        rte_spinlock_unlock(&vq->access_lock);
 
        return ret;
@@ -1740,13 +1745,12 @@ rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
 
 int
 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id,
-               struct rte_vhost_async_config config,
-               struct rte_vhost_async_channel_ops *ops)
+               struct rte_vhost_async_config config)
 {
        struct vhost_virtqueue *vq;
        struct virtio_net *dev = get_device(vid);
 
-       if (dev == NULL || ops == NULL)
+       if (dev == NULL)
                return -1;
 
        if (queue_id >= VHOST_MAX_VRING)
@@ -1764,11 +1768,7 @@ rte_vhost_async_channel_register_thread_unsafe(int vid, 
uint16_t queue_id,
                return -1;
        }
 
-       if (unlikely(ops->check_completed_copies == NULL ||
-               ops->transfer_data == NULL))
-               return -1;
-
-       return async_channel_register(vid, queue_id, config, ops);
+       return async_channel_register(vid, queue_id, config);
 }
 
 int
@@ -1808,9 +1808,6 @@ rte_vhost_async_channel_unregister(int vid, uint16_t 
queue_id)
        }
 
        vhost_free_async_mem(vq);
-
-       vq->async_ops.transfer_data = NULL;
-       vq->async_ops.check_completed_copies = NULL;
        vq->async_registered = false;
 
 out:
@@ -1846,9 +1843,6 @@ rte_vhost_async_channel_unregister_thread_unsafe(int vid, 
uint16_t queue_id)
        }
 
        vhost_free_async_mem(vq);
-
-       vq->async_ops.transfer_data = NULL;
-       vq->async_ops.check_completed_copies = NULL;
        vq->async_registered = false;
 
        return 0;
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index a2309b06cd..2c996c4414 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -120,6 +120,26 @@ struct vring_used_elem_packed {
        uint32_t count;
 };
 
+
+/* vHost async DMADEV ring size. */
+#define VHOST_ASYNC_DMADEV_RING_SIZE 4096
+
+#define DMA_COMPLETION_RING_SIZE VHOST_ASYNC_DMADEV_RING_SIZE
+
+struct enq_info_t{
+    uint8_t pkt_rcvd; //Make this atomic
+};
+
+/* DMA completion tracking ring to reorder the packets.
+ * The write's to the enq_info array should be atomic
+ * to guarantee correct behaviour. */
+struct dma_completions_t {
+    struct enq_info_t enq_info[DMA_COMPLETION_RING_SIZE];
+    uint16_t count;
+    uint16_t read_idx;
+    uint16_t write_idx;
+};
+
 /**
  * Structure contains variables relevant to RX/TX virtqueues.
  */
@@ -194,9 +214,6 @@ struct vhost_virtqueue {
        struct rte_vhost_resubmit_info *resubmit_inflight;
        uint64_t                global_counter;
 
-       /* operation callbacks for async dma */
-       struct rte_vhost_async_channel_ops      async_ops;
-
        struct rte_vhost_iov_iter *it_pool;
        struct iovec *vec_pool;
 
@@ -221,6 +238,7 @@ struct vhost_virtqueue {
        /* vq async features */
        bool            async_registered;
        uint32_t        async_threshold;
+       struct dma_completions_t *dma_completions;
 
        int                     notif_enable;
 #define VIRTIO_UNINITIALIZED_NOTIF     (-1)
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index c69dc35988..5b1209bb91 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -11,6 +11,7 @@
 #include <rte_net.h>
 #include <rte_ether.h>
 #include <rte_ip.h>
+#include <rte_dmadev.h>
 #include <rte_vhost.h>
 #include <rte_tcp.h>
 #include <rte_udp.h>
@@ -1588,6 +1589,227 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
        return virtio_dev_rx(dev, queue_id, pkts, count);
 }
 
+
+/* Checks if the dma_completion ring is full. */
+static inline bool
+is_compl_ring_full(struct dma_completions_t *dma_compl)
+{
+    return dma_compl->count == DMA_COMPLETION_RING_SIZE;
+}
+
+/* Checks if the dma_completion ring is empty. */
+static inline bool
+is_compl_ring_empty(struct dma_completions_t *dma_compl)
+{
+    return dma_compl->count == 0;
+}
+
+static void 
*dmadev_enq_track[RTE_DMADEV_MAX_DEVS][VHOST_ASYNC_DMADEV_RING_SIZE];
+
+/* Enqueue a packet via DMA. */
+static inline void
+dmadev_enqueue_packet(const uint16_t dev_id,
+                    const struct rte_vhost_iov_iter *src_ptr,
+                    const struct rte_vhost_iov_iter *dst_ptr,
+                    const uint16_t nr_segs,
+                    struct enq_info_t *slot_addr)
+{
+    uint16_t seg_idx = 0;
+    struct enq_info_t *addr = NULL;
+    uint64_t dma_flags = RTE_DMA_OP_FLAG_LLC;
+    const uint16_t dmadev_ring_mask = VHOST_ASYNC_DMADEV_RING_SIZE-1;
+
+    while (likely(seg_idx < nr_segs)) {
+        /* Fetch DMA source start addr. */
+        const rte_iova_t s_base = (uintptr_t)(src_ptr->iov[seg_idx].iov_base);
+        const rte_iova_t dma_src_start_addr = src_ptr->offset + s_base;
+        /* Fetch DMA destination start addr. */
+        const rte_iova_t d_base = (uintptr_t)(dst_ptr->iov[seg_idx].iov_base);
+        const rte_iova_t dma_dst_start_addr = dst_ptr->offset + d_base;
+        /* Fetch packet segment length. */
+        const uint32_t dma_src_len = src_ptr->iov[seg_idx].iov_len;
+        /* Check if this segment is the last. */
+        if (seg_idx == nr_segs - 1) {
+            addr = slot_addr;
+        }
+
+        int enq_index = rte_dmadev_copy(dev_id,
+                                        0,
+                                        dma_src_start_addr,
+                                        dma_dst_start_addr,
+                                        dma_src_len,
+                                        dma_flags);
+        if (enq_index < 0)
+            break;
+        dmadev_enq_track[dev_id][enq_index & dmadev_ring_mask] = (void *)addr;
+        seg_idx++;
+    }
+}
+
+/* Enqueue a packet through SW copy. */
+static inline void
+sw_enqueue_packet(const struct rte_vhost_iov_iter *src_ptr,
+                  const struct rte_vhost_iov_iter *dst_ptr,
+                  const uint16_t nr_segs)
+{
+    uint16_t seg_idx = 0;
+
+    while (likely(seg_idx < nr_segs)) {
+        /* Fetch source start addr. */
+        const uintptr_t s_base = (uintptr_t)(src_ptr->iov[seg_idx].iov_base);
+        const uintptr_t src_start_addr = src_ptr->offset + s_base;
+        /* Fetch destination start addr. */
+        const uintptr_t d_base = (uintptr_t)(dst_ptr->iov[seg_idx].iov_base);
+        const uintptr_t dst_start_addr = dst_ptr->offset + d_base;
+        /* Fetch segment length. */
+        const size_t src_len = src_ptr->iov[seg_idx].iov_len;
+
+        rte_memcpy((void *) dst_start_addr,
+                   (void *) src_start_addr,
+                   src_len);
+        seg_idx++;
+    }
+}
+
+/* Fetch the slot address for a packet. */
+static inline struct enq_info_t *
+compl_slot_get_and_inc(struct dma_completions_t *dma_compl)
+{
+    struct enq_info_t *slot_addr
+                        = &(dma_compl->enq_info[dma_compl->write_idx]);
+    const uint16_t ring_mask = DMA_COMPLETION_RING_SIZE - 1;
+
+    dma_compl->write_idx++;
+    dma_compl->write_idx &= ring_mask;
+    dma_compl->count++;
+    return slot_addr;
+}
+
+/* Calculate packets sent for a txq by parsing dma_completion ring. */
+static inline uint32_t
+count_completed_packets(struct dma_completions_t *dma_compl,
+                        const int max_pkts)
+{
+    uint32_t pkts;
+    int count = dma_compl->count;
+    int read_idx = dma_compl->read_idx;
+    uint8_t pkt_rcvd = 0;
+    const uint16_t ring_mask = DMA_COMPLETION_RING_SIZE - 1;
+
+    for (pkts = 0; (pkts < (uint32_t)max_pkts) && (count > 0); pkts++) {
+        read_idx &= ring_mask;
+        pkt_rcvd = dma_compl->enq_info[read_idx].pkt_rcvd;
+        if (!pkt_rcvd) {
+            break;
+        }
+
+        dma_compl->enq_info[read_idx].pkt_rcvd = 0;
+        count--;
+        read_idx++;
+    }
+    dma_compl->count = count;
+    dma_compl->read_idx = read_idx;
+    return pkts;
+}
+
+/* Offload enqueue via DMA. */
+static int32_t
+dmadev_transfer_data(int dev_id,
+                     struct dma_completions_t *compl,
+                     struct rte_vhost_async_desc *descs,
+                     uint16_t count)
+{
+    uint16_t desc_idx = 0;
+    struct enq_info_t *slot_addr = NULL;
+
+    if (is_compl_ring_full(compl)) {
+        goto out;
+    }
+
+    /* Cache space left in DMA ring to avoid driver call for every packet. */
+    uint16_t dmadev_space_left = rte_dmadev_burst_capacity(dev_id);
+    const int compl_space_left = DMA_COMPLETION_RING_SIZE - compl->count;
+    if (count > compl_space_left) {
+        count = compl_space_left;
+    }
+
+    while (desc_idx < count) {
+        const struct rte_vhost_iov_iter *src_ptr = descs[desc_idx].src;
+        const struct rte_vhost_iov_iter *dst_ptr = descs[desc_idx].dst;
+        const uint16_t nr_segs = src_ptr->nr_segs;
+        if (dmadev_space_left < nr_segs) {
+            goto ring_doorbell;
+        }
+        slot_addr = compl_slot_get_and_inc(compl);
+        dmadev_enqueue_packet(dev_id, src_ptr, dst_ptr, nr_segs, slot_addr);
+        dmadev_space_left -= nr_segs;
+        desc_idx++;
+    }
+
+ring_doorbell:
+    if (desc_idx != 0) {
+        /* Ring the doorbell. */
+        rte_dmadev_submit(dev_id, 0);
+    }
+
+    /* Do software copy for packets that do no fit in the DMA ring. */
+    while (desc_idx < count) {
+        const struct rte_vhost_iov_iter *src_ptr = descs[desc_idx].src;
+        const struct rte_vhost_iov_iter *dst_ptr = descs[desc_idx].dst;
+        slot_addr = compl_slot_get_and_inc(compl);
+        sw_enqueue_packet(src_ptr, dst_ptr, src_ptr->nr_segs);
+        slot_addr->pkt_rcvd = 1;
+        desc_idx++;
+    }
+
+out:
+    return desc_idx;
+}
+
+/* Query transfer status of DMA. */
+static int32_t
+dmadev_check_completed_copies(int dev_id,
+                              struct dma_completions_t *compl,
+                              uint16_t max_pkts)
+{
+    bool error;
+    uint16_t last_idx;
+    uint32_t nr_pkts = 0;
+    struct enq_info_t *slots;
+    const uint16_t mask = VHOST_ASYNC_DMADEV_RING_SIZE-1;
+
+    if (unlikely(is_compl_ring_empty(compl))) {
+        goto out;
+    }
+
+    /* Check the completion status of DMA. */
+    const int ret_segs = rte_dmadev_completed(dev_id,
+                                              0,
+                                              MAX_PKT_BURST,
+                                              &last_idx,
+                                              &error);
+    if (unlikely(error)) {
+        return -1;
+    }
+    /* Compute the start index. */
+    uint16_t idx = (last_idx - ret_segs + 1);
+    for (int i = 0; i < ret_segs; i++) {
+        slots = (struct enq_info_t* )dmadev_enq_track[dev_id][idx & mask];
+        if (slots) {
+            /* Mark the packet slot as recieved.
+             * The slot could belong to another queue but writes are atomic. */
+            slots->pkt_rcvd = 1;
+        }
+        idx++;
+    }
+    /* Calculate packets successfully offloaded from this virtqueue. */
+    nr_pkts = count_completed_packets(compl, max_pkts);
+
+out:
+    return nr_pkts;
+}
+
+
 static __rte_always_inline uint16_t
 virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
        uint16_t vq_size, uint16_t n_inflight)
@@ -1631,9 +1853,9 @@ store_dma_desc_info_packed(struct vring_used_elem_packed 
*s_ring,
 
 static __rte_noinline uint32_t
 virtio_dev_rx_async_submit_split(struct virtio_net *dev,
-       struct vhost_virtqueue *vq, uint16_t queue_id,
+       struct vhost_virtqueue *vq, uint16_t queue_id __rte_unused,
        struct rte_mbuf **pkts, uint32_t count,
-       struct rte_mbuf **comp_pkts, uint32_t *comp_count)
+       struct rte_mbuf **comp_pkts, uint32_t *comp_count, int dmadev_id)
 {
        uint32_t pkt_idx = 0, pkt_burst_idx = 0;
        uint16_t num_buffers;
@@ -1732,8 +1954,8 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
                if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
                        ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
                        BUF_VECTOR_MAX))) {
-                       n_xfer = vq->async_ops.transfer_data(dev->vid,
-                                       queue_id, tdes, 0, pkt_burst_idx);
+                       n_xfer = dmadev_transfer_data(dmadev_id, 
vq->dma_completions, tdes,
+                                                       pkt_burst_idx);
                        if (n_xfer >= 0) {
                                n_pkts = n_xfer;
                        } else {
@@ -1765,7 +1987,8 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
        }
 
        if (pkt_burst_idx) {
-               n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 
0, pkt_burst_idx);
+               n_xfer = dmadev_transfer_data(dmadev_id, vq->dma_completions, 
tdes,
+                                       pkt_burst_idx);
                if (n_xfer >= 0) {
                        n_pkts = n_xfer;
                } else {
@@ -2013,9 +2236,9 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, 
struct vring_packed_desc *a
 
 static __rte_noinline uint32_t
 virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
-       struct vhost_virtqueue *vq, uint16_t queue_id,
+       struct vhost_virtqueue *vq, uint16_t queue_id  __rte_unused,
        struct rte_mbuf **pkts, uint32_t count,
-       struct rte_mbuf **comp_pkts, uint32_t *comp_count)
+       struct rte_mbuf **comp_pkts, uint32_t *comp_count, int dmadev_id)
 {
        uint32_t pkt_idx = 0, pkt_burst_idx = 0;
        uint32_t remained = count;
@@ -2105,8 +2328,8 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
                 */
                if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
                        ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < 
BUF_VECTOR_MAX))) {
-                       n_xfer = vq->async_ops.transfer_data(dev->vid,
-                                       queue_id, tdes, 0, pkt_burst_idx);
+                       n_xfer = dmadev_transfer_data(dmadev_id, 
vq->dma_completions, tdes,
+                                               pkt_burst_idx);
                        if (n_xfer >= 0) {
                                n_pkts = n_xfer;
                        } else {
@@ -2137,7 +2360,9 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
        } while (pkt_idx < count);
 
        if (pkt_burst_idx) {
-               n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 
0, pkt_burst_idx);
+               n_xfer = dmadev_transfer_data(dmadev_id, vq->dma_completions, 
tdes,
+                                               pkt_burst_idx);
+
                if (n_xfer >= 0) {
                        n_pkts = n_xfer;
                } else {
@@ -2225,7 +2450,7 @@ write_back_completed_descs_packed(struct vhost_virtqueue 
*vq,
 
 static __rte_always_inline uint16_t
 vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
-               struct rte_mbuf **pkts, uint16_t count)
+               struct rte_mbuf **pkts, uint16_t count, int dmadev_id)
 {
        struct vhost_virtqueue *vq;
        uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
@@ -2243,8 +2468,9 @@ vhost_poll_enqueue_completed(struct virtio_net *dev, 
uint16_t queue_id,
                vq_size, vq->async_pkts_inflight_n);
 
        if (count > vq->async_last_pkts_n) {
-               n_cpl = vq->async_ops.check_completed_copies(dev->vid,
-                       queue_id, 0, count - vq->async_last_pkts_n);
+               n_cpl = dmadev_check_completed_copies(dmadev_id, 
vq->dma_completions,
+                                                       count - 
vq->async_last_pkts_n);
+
                if (n_cpl >= 0) {
                        n_pkts_cpl = n_cpl;
                } else {
@@ -2306,7 +2532,7 @@ vhost_poll_enqueue_completed(struct virtio_net *dev, 
uint16_t queue_id,
 
 uint16_t
 rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
-               struct rte_mbuf **pkts, uint16_t count)
+               struct rte_mbuf **pkts, uint16_t count, int dmadev_id)
 {
        struct virtio_net *dev = get_device(vid);
        struct vhost_virtqueue *vq;
@@ -2332,7 +2558,7 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t 
queue_id,
 
        rte_spinlock_lock(&vq->access_lock);
 
-       n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
+       n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count, 
dmadev_id);
 
        rte_spinlock_unlock(&vq->access_lock);
 
@@ -2341,7 +2567,7 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t 
queue_id,
 
 uint16_t
 rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
-               struct rte_mbuf **pkts, uint16_t count)
+               struct rte_mbuf **pkts, uint16_t count, int dmadev_id)
 {
        struct virtio_net *dev = get_device(vid);
        struct vhost_virtqueue *vq;
@@ -2365,7 +2591,7 @@ rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t 
queue_id,
                return 0;
        }
 
-       n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
+       n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count, 
dmadev_id);
 
        return n_pkts_cpl;
 }
@@ -2373,7 +2599,7 @@ rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t 
queue_id,
 static __rte_always_inline uint32_t
 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
        struct rte_mbuf **pkts, uint32_t count,
-       struct rte_mbuf **comp_pkts, uint32_t *comp_count)
+       struct rte_mbuf **comp_pkts, uint32_t *comp_count, int dmadev_id)
 {
        struct vhost_virtqueue *vq;
        uint32_t nb_tx = 0;
@@ -2406,11 +2632,11 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, 
uint16_t queue_id,
        if (vq_is_packed(dev))
                nb_tx = virtio_dev_rx_async_submit_packed(dev,
                                vq, queue_id, pkts, count, comp_pkts,
-                               comp_count);
+                               comp_count, dmadev_id);
        else
                nb_tx = virtio_dev_rx_async_submit_split(dev,
                                vq, queue_id, pkts, count, comp_pkts,
-                               comp_count);
+                               comp_count, dmadev_id);
 
 out:
        if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
@@ -2425,7 +2651,7 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, 
uint16_t queue_id,
 uint16_t
 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
                struct rte_mbuf **pkts, uint16_t count,
-               struct rte_mbuf **comp_pkts, uint32_t *comp_count)
+               struct rte_mbuf **comp_pkts, uint32_t *comp_count, int 
dmadev_id)
 {
        struct virtio_net *dev = get_device(vid);
 
@@ -2441,7 +2667,7 @@ rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
        }
 
        return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, comp_pkts,
-                       comp_count);
+                       comp_count, dmadev_id);
 }
 
 static inline bool
@@ -3631,9 +3857,10 @@ async_desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 }
 
 static __rte_always_inline uint16_t
-async_poll_dequeue_completed_split(struct virtio_net *dev,
-               struct vhost_virtqueue *vq, uint16_t queue_id,
-               struct rte_mbuf **pkts, uint16_t count, bool legacy_ol_flags)
+async_poll_dequeue_completed_split(struct virtio_net *dev  __rte_unused,
+               struct vhost_virtqueue *vq, uint16_t queue_id  __rte_unused,
+               struct rte_mbuf **pkts, uint16_t count, bool legacy_ol_flags,
+               int dmadev_id)
 {
        uint16_t n_pkts_cpl = 0, n_pkts_put = 0;
        uint16_t start_idx, pkt_idx, from;
@@ -3646,9 +3873,9 @@ async_poll_dequeue_completed_split(struct virtio_net *dev,
 
        if (count > vq->async_last_pkts_n) {
                int ret;
+               ret = dmadev_check_completed_copies(dmadev_id, 
vq->dma_completions,
+                                                       count - 
vq->async_last_pkts_n);
 
-               ret = vq->async_ops.check_completed_copies(dev->vid, queue_id,
-                               0, count - vq->async_last_pkts_n);
                if (unlikely(ret < 0)) {
                        VHOST_LOG_DATA(ERR, "(%d) async channel poll error\n", 
dev->vid);
                        ret = 0;
@@ -3688,7 +3915,7 @@ static __rte_always_inline uint16_t
 virtio_dev_tx_async_split(struct virtio_net *dev,
                struct vhost_virtqueue *vq, uint16_t queue_id,
                struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
-               uint16_t count, bool legacy_ol_flags)
+               uint16_t count, bool legacy_ol_flags, int dmadev_id)
 {
        static bool allocerr_warned;
        uint16_t free_entries;
@@ -3802,16 +4029,14 @@ virtio_dev_tx_async_split(struct virtio_net *dev,
                                         iovec_idx < BUF_VECTOR_MAX))) {
                        uint16_t nr_pkts;
                        int32_t ret;
-
-                       ret = vq->async_ops.transfer_data(dev->vid, queue_id,
-                                       tdes, 0, nr_async_burst);
+                       ret = dmadev_transfer_data(dmadev_id, 
vq->dma_completions,
+                                                               tdes, 
nr_async_burst);
                        if (unlikely(ret < 0)) {
                                VHOST_LOG_DATA(ERR, "(%d) async channel submit"
                                                " error\n", dev->vid);
                                ret = 0;
                        }
                        nr_pkts = ret;
-
                        vq->async_pkts_inflight_n += nr_pkts;
                        it_idx = 0;
                        iovec_idx = 0;
@@ -3828,16 +4053,14 @@ virtio_dev_tx_async_split(struct virtio_net *dev,
        if (nr_async_burst) {
                uint16_t nr_pkts;
                int32_t ret;
-
-               ret = vq->async_ops.transfer_data(dev->vid, queue_id,
-                               tdes, 0, nr_async_burst);
+               ret = dmadev_transfer_data(dmadev_id, vq->dma_completions, tdes,
+                                                       nr_async_burst);
                if (unlikely(ret < 0)) {
                        VHOST_LOG_DATA(ERR, "(%d) async channel submit error\n",
                                        dev->vid);
                        ret = 0;
                }
                nr_pkts = ret;
-
                vq->async_pkts_inflight_n += nr_pkts;
 
                if (unlikely(nr_pkts < nr_async_burst))
@@ -3886,7 +4109,7 @@ virtio_dev_tx_async_split(struct virtio_net *dev,
        if (nr_done_pkts < count && vq->async_pkts_inflight_n > 0) {
                nr_done_pkts += async_poll_dequeue_completed_split(dev, vq,
                                        queue_id, &pkts[nr_done_pkts],
-                                       count - nr_done_pkts, legacy_ol_flags);
+                                       count - nr_done_pkts, legacy_ol_flags, 
dmadev_id);
        }
 
        if (likely(nr_done_pkts))
@@ -3900,10 +4123,10 @@ static uint16_t
 virtio_dev_tx_async_split_legacy(struct virtio_net *dev,
                struct vhost_virtqueue *vq, uint16_t queue_id,
                struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
-               uint16_t count)
+               uint16_t count, int dmadev_id)
 {
        return virtio_dev_tx_async_split(dev, vq, queue_id, mbuf_pool,
-                               pkts, count, true);
+                               pkts, count, true, dmadev_id);
 }
 
 __rte_noinline
@@ -3911,16 +4134,16 @@ static uint16_t
 virtio_dev_tx_async_split_compliant(struct virtio_net *dev,
                struct vhost_virtqueue *vq, uint16_t queue_id,
                struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
-               uint16_t count)
+               uint16_t count, int dmadev_id)
 {
        return virtio_dev_tx_async_split(dev, vq, queue_id, mbuf_pool,
-                               pkts, count, false);
+                               pkts, count, false, dmadev_id);
 }
 
 uint16_t
 rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
        struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
-       int *nr_inflight)
+       int *nr_inflight, int dmadev_id)
 {
        struct virtio_net *dev;
        struct rte_mbuf *rarp_mbuf = NULL;
@@ -4007,10 +4230,10 @@ rte_vhost_async_try_dequeue_burst(int vid, uint16_t 
queue_id,
 
        if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
                count = virtio_dev_tx_async_split_legacy(dev, vq, queue_id,
-                               mbuf_pool, pkts, count);
+                               mbuf_pool, pkts, count, dmadev_id);
        else
                count = virtio_dev_tx_async_split_compliant(dev, vq, queue_id,
-                               mbuf_pool, pkts, count);
+                               mbuf_pool, pkts, count, dmadev_id);
 
 out:
        *nr_inflight = vq->async_pkts_inflight_n;
-- 
2.25.1

[dpdk-dev] [PATCH RFC 1/1] vhost: add DMADEV support for async datapath

Reply via email to