From: Patrick <patrick...@intel.com>

This patch introduces registration/un-registration APIs
for async data path together with all required data
structures and DMA callback function proto-types.

Signed-off-by: Patrick <patrick...@intel.com>
---
 lib/librte_vhost/Makefile          |   3 +-
 lib/librte_vhost/rte_vhost.h       |   1 +
 lib/librte_vhost/rte_vhost_async.h | 134 +++++++++++++++++++++++++++++++++++++
 lib/librte_vhost/socket.c          |  20 ++++++
 lib/librte_vhost/vhost.c           |  74 +++++++++++++++++++-
 lib/librte_vhost/vhost.h           |  30 ++++++++-
 lib/librte_vhost/vhost_user.c      |  28 ++++++--
 7 files changed, 283 insertions(+), 7 deletions(-)
 create mode 100644 lib/librte_vhost/rte_vhost_async.h

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index e592795..3aed094 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -41,7 +41,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c 
vhost.c \
                                        vhost_user.c virtio_net.c vdpa.c
 
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h \
+                                               rte_vhost_async.h
 
 # only compile vhost crypto when cryptodev is enabled
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index d43669f..cec4d07 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -35,6 +35,7 @@
 #define RTE_VHOST_USER_EXTBUF_SUPPORT  (1ULL << 5)
 /* support only linear buffers (no chained mbufs) */
 #define RTE_VHOST_USER_LINEARBUF_SUPPORT       (1ULL << 6)
+#define RTE_VHOST_USER_ASYNC_COPY      (1ULL << 7)
 
 /** Protocol features. */
 #ifndef VHOST_USER_PROTOCOL_F_MQ
diff --git a/lib/librte_vhost/rte_vhost_async.h 
b/lib/librte_vhost/rte_vhost_async.h
new file mode 100644
index 0000000..82f2ebe
--- /dev/null
+++ b/lib/librte_vhost/rte_vhost_async.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_VHOST_ASYNC_H_
+#define _RTE_VHOST_ASYNC_H_
+
+#include "rte_vhost.h"
+
+/**
+ * iovec iterator
+ */
+struct iov_it {
+       /** offset to the first byte of interesting data */
+       size_t offset;
+       /** total bytes of data in this iterator */
+       size_t count;
+       /** pointer to the iovec array */
+       struct iovec *iov;
+       /** number of iovec in this iterator */
+       unsigned long nr_segs;
+};
+
+/**
+ * dma transfer descriptor pair
+ */
+struct dma_trans_desc {
+       /** source memory iov_it */
+       struct iov_it *src;
+       /** destination memory iov_it */
+       struct iov_it *dst;
+};
+
+/**
+ * dma transfer status
+ */
+struct dma_trans_status {
+       /** An array of application specific data for source memory */
+       uintptr_t *src_opaque_data;
+       /** An array of application specific data for destination memory */
+       uintptr_t *dst_opaque_data;
+};
+
+/**
+ * dma operation callbacks to be implemented by applications
+ */
+struct rte_vhost_async_channel_ops {
+       /**
+        * instruct a DMA channel to perform copies for a batch of packets
+        *
+        * @param vid
+        *  id of vhost device to perform data copies
+        * @param queue_id
+        *  queue id to perform data copies
+        * @param descs
+        *  an array of DMA transfer memory descriptors
+        * @param opaque_data
+        *  opaque data pair sending to DMA engine
+        * @param count
+        *  number of elements in the "descs" array
+        * @return
+        *  -1 on failure, number of descs processed on success
+        */
+       int (*transfer_data)(int vid, uint16_t queue_id,
+               struct dma_trans_desc *descs,
+               struct dma_trans_status *opaque_data,
+               uint16_t count);
+       /**
+        * check copy-completed packets from a DMA channel
+        * @param vid
+        *  id of vhost device to check copy completion
+        * @param queue_id
+        *  queue id to check copyp completion
+        * @param opaque_data
+        *  buffer to receive the opaque data pair from DMA engine
+        * @param max_packets
+        *  max number of packets could be completed
+        * @return
+        *  -1 on failure, number of iov segments completed on success
+        */
+       int (*check_completed_copies)(int vid, uint16_t queue_id,
+               struct dma_trans_status *opaque_data,
+               uint16_t max_packets);
+};
+
+/**
+ *  dma channel feature bit definition
+ */
+struct dma_channel_features {
+       union {
+               uint32_t intval;
+               struct {
+                       uint32_t inorder:1;
+                       uint32_t resvd0115:15;
+                       uint32_t threshold:12;
+                       uint32_t resvd2831:4;
+               };
+       };
+};
+
+/**
+ * register a dma channel for vhost
+ *
+ * @param vid
+ *  vhost device id DMA channel to be attached to
+ * @param queue_id
+ *  vhost queue id DMA channel to be attached to
+ * @param features
+ *  DMA channel feature bit
+ *    b0       : DMA supports inorder data transfer
+ *    b1  - b15: reserved
+ *    b16 - b27: Packet length threshold for DMA transfer
+ *    b28 - b31: reserved
+ * @param ops
+ *  DMA operation callbacks
+ * @return
+ *  0 on success, -1 on failures
+ */
+int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
+       uint32_t features, struct rte_vhost_async_channel_ops *ops);
+
+/**
+ * unregister a dma channel for vhost
+ *
+ * @param vid
+ *  vhost device id DMA channel to be detached
+ * @param queue_id
+ *  vhost queue id DMA channel to be detached
+ * @return
+ *  0 on success, -1 on failures
+ */
+int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id);
+
+#endif /* _RTE_VDPA_H_ */
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 0a66ef9..f817783 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -42,6 +42,7 @@ struct vhost_user_socket {
        bool use_builtin_virtio_net;
        bool extbuf;
        bool linearbuf;
+       bool async_copy;
 
        /*
         * The "supported_features" indicates the feature bits the
@@ -210,6 +211,7 @@ struct vhost_user {
        size_t size;
        struct vhost_user_connection *conn;
        int ret;
+       struct virtio_net *dev;
 
        if (vsocket == NULL)
                return;
@@ -241,6 +243,13 @@ struct vhost_user {
        if (vsocket->linearbuf)
                vhost_enable_linearbuf(vid);
 
+       if (vsocket->async_copy) {
+               dev = get_device(vid);
+
+               if (dev)
+                       dev->async_copy = 1;
+       }
+
        VHOST_LOG_CONFIG(INFO, "new device, handle is %d\n", vid);
 
        if (vsocket->notify_ops->new_connection) {
@@ -891,6 +900,17 @@ struct vhost_user_reconnect_list {
                goto out_mutex;
        }
 
+       vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
+
+       if (vsocket->async_copy &&
+               (flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
+               RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
+               VHOST_LOG_CONFIG(ERR, "error: enabling async copy and IOMMU "
+                       "or post-copy feature simultaneously is not "
+                       "supported\n");
+               goto out_mutex;
+       }
+
        /*
         * Set the supported features correctly for the builtin vhost-user
         * net driver.
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 0266318..e6b688a 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -332,8 +332,13 @@
 {
        if (vq_is_packed(dev))
                rte_free(vq->shadow_used_packed);
-       else
+       else {
                rte_free(vq->shadow_used_split);
+               if (vq->async_pkts_pending)
+                       rte_free(vq->async_pkts_pending);
+               if (vq->async_pending_info)
+                       rte_free(vq->async_pending_info);
+       }
        rte_free(vq->batch_copy_elems);
        rte_mempool_free(vq->iotlb_pool);
        rte_free(vq);
@@ -1527,3 +1532,70 @@ int rte_vhost_extern_callback_register(int vid,
        if (vhost_data_log_level >= 0)
                rte_log_set_level(vhost_data_log_level, RTE_LOG_WARNING);
 }
+
+int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
+                                       uint32_t features,
+                                       struct rte_vhost_async_channel_ops *ops)
+{
+       struct vhost_virtqueue *vq;
+       struct virtio_net *dev = get_device(vid);
+       struct dma_channel_features f;
+
+       if (dev == NULL || ops == NULL)
+               return -1;
+
+       f.intval = features;
+
+       vq = dev->virtqueue[queue_id];
+
+       if (vq == NULL)
+               return -1;
+
+       /** packed queue is not supported */
+       if (vq_is_packed(dev) || !f.inorder)
+               return -1;
+
+       if (ops->check_completed_copies == NULL ||
+               ops->transfer_data == NULL)
+               return -1;
+
+       rte_spinlock_lock(&vq->access_lock);
+
+       vq->async_ops.check_completed_copies = ops->check_completed_copies;
+       vq->async_ops.transfer_data = ops->transfer_data;
+
+       vq->async_inorder = f.inorder;
+       vq->async_threshold = f.threshold;
+
+       vq->async_registered = true;
+
+       rte_spinlock_unlock(&vq->access_lock);
+
+       return 0;
+}
+
+int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
+{
+       struct vhost_virtqueue *vq;
+       struct virtio_net *dev = get_device(vid);
+
+       if (dev == NULL)
+               return -1;
+
+       vq = dev->virtqueue[queue_id];
+
+       if (vq == NULL)
+               return -1;
+
+       rte_spinlock_lock(&vq->access_lock);
+
+       vq->async_ops.transfer_data = NULL;
+       vq->async_ops.check_completed_copies = NULL;
+
+       vq->async_registered = false;
+
+       rte_spinlock_unlock(&vq->access_lock);
+
+       return 0;
+}
+
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index df98d15..a7fbe23 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -23,6 +23,8 @@
 #include "rte_vhost.h"
 #include "rte_vdpa.h"
 
+#include "rte_vhost_async.h"
+
 /* Used to indicate that the device is running on a data core */
 #define VIRTIO_DEV_RUNNING 1
 /* Used to indicate that the device is ready to operate */
@@ -39,6 +41,11 @@
 
 #define VHOST_LOG_CACHE_NR 32
 
+#define MAX_PKT_BURST 32
+
+#define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST * 2)
+#define VHOST_MAX_ASYNC_VEC (BUF_VECTOR_MAX * 2)
+
 #define PACKED_DESC_ENQUEUE_USED_FLAG(w)       \
        ((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \
                VRING_DESC_F_WRITE)
@@ -200,6 +207,25 @@ struct vhost_virtqueue {
        TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
        int                             iotlb_cache_nr;
        TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
+
+       /* operation callbacks for async dma */
+       struct rte_vhost_async_channel_ops      async_ops;
+
+       struct iov_it it_pool[VHOST_MAX_ASYNC_IT];
+       struct iovec vec_pool[VHOST_MAX_ASYNC_VEC];
+
+       /* async data transfer status */
+       uintptr_t       **async_pkts_pending;
+       #define         ASYNC_PENDING_INFO_N_MSK 0xFFFF
+       #define         ASYNC_PENDING_INFO_N_SFT 16
+       uint64_t        *async_pending_info;
+       uint16_t        async_pkts_idx;
+       uint16_t        async_pkts_inflight_n;
+
+       /* vq async features */
+       bool            async_inorder;
+       bool            async_registered;
+       uint16_t        async_threshold;
 } __rte_cache_aligned;
 
 /* Old kernels have no such macros defined */
@@ -353,6 +379,7 @@ struct virtio_net {
        int16_t                 broadcast_rarp;
        uint32_t                nr_vring;
        int                     dequeue_zero_copy;
+       int                     async_copy;
        int                     extbuf;
        int                     linearbuf;
        struct vhost_virtqueue  *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
@@ -702,7 +729,8 @@ uint64_t translate_log_addr(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
        /* Don't kick guest if we don't reach index specified by guest. */
        if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
                uint16_t old = vq->signalled_used;
-               uint16_t new = vq->last_used_idx;
+               uint16_t new = vq->async_pkts_inflight_n ?
+                                       vq->used->idx:vq->last_used_idx;
                bool signalled_used_valid = vq->signalled_used_valid;
 
                vq->signalled_used = new;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 84bebad..d7600bf 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -464,12 +464,25 @@
        } else {
                if (vq->shadow_used_split)
                        rte_free(vq->shadow_used_split);
+               if (vq->async_pkts_pending)
+                       rte_free(vq->async_pkts_pending);
+               if (vq->async_pending_info)
+                       rte_free(vq->async_pending_info);
+
                vq->shadow_used_split = rte_malloc(NULL,
                                vq->size * sizeof(struct vring_used_elem),
                                RTE_CACHE_LINE_SIZE);
-               if (!vq->shadow_used_split) {
+               vq->async_pkts_pending = rte_malloc(NULL,
+                               vq->size * sizeof(uintptr_t),
+                               RTE_CACHE_LINE_SIZE);
+               vq->async_pending_info = rte_malloc(NULL,
+                               vq->size * sizeof(uint64_t),
+                               RTE_CACHE_LINE_SIZE);
+               if (!vq->shadow_used_split ||
+                       !vq->async_pkts_pending ||
+                       !vq->async_pending_info) {
                        VHOST_LOG_CONFIG(ERR,
-                                       "failed to allocate memory for shadow 
used ring.\n");
+                                       "failed to allocate memory for vq 
internal data.\n");
                        return RTE_VHOST_MSG_RESULT_ERR;
                }
        }
@@ -1147,7 +1160,8 @@
                        goto err_mmap;
                }
 
-               populate = (dev->dequeue_zero_copy) ? MAP_POPULATE : 0;
+               populate = (dev->dequeue_zero_copy || dev->async_copy) ?
+                       MAP_POPULATE : 0;
                mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
                                 MAP_SHARED | populate, fd, 0);
 
@@ -1162,7 +1176,7 @@
                reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
                                      mmap_offset;
 
-               if (dev->dequeue_zero_copy)
+               if (dev->dequeue_zero_copy || dev->async_copy)
                        if (add_guest_pages(dev, reg, alignment) < 0) {
                                VHOST_LOG_CONFIG(ERR,
                                        "adding guest pages to region %u 
failed.\n",
@@ -1945,6 +1959,12 @@ static int vhost_user_set_vring_err(struct virtio_net 
**pdev __rte_unused,
        } else {
                rte_free(vq->shadow_used_split);
                vq->shadow_used_split = NULL;
+               if (vq->async_pkts_pending)
+                       rte_free(vq->async_pkts_pending);
+               if (vq->async_pending_info)
+                       rte_free(vq->async_pending_info);
+               vq->async_pkts_pending = NULL;
+               vq->async_pending_info = NULL;
        }
 
        rte_free(vq->batch_copy_elems);
-- 
1.8.3.1

Reply via email to