RE: [PATCH v1 1/1] vhost: integrate dmadev in asynchronous datapath

Xia, Chenbo Thu, 13 Jan 2022 22:31:08 -0800

Hi Jiayu,

This is first round of review, I'll spend time on OVS patches later and look 
back.


> -----Original Message-----
> From: Hu, Jiayu <jiayu...@intel.com>
> Sent: Friday, December 31, 2021 5:55 AM
> To: dev@dpdk.org
> Cc: maxime.coque...@redhat.com; i.maxim...@ovn.org; Xia, Chenbo
> <chenbo....@intel.com>; Richardson, Bruce <bruce.richard...@intel.com>; Van
> Haaren, Harry <harry.van.haa...@intel.com>; Pai G, Sunil
> <sunil.pa...@intel.com>; Mcnamara, John <john.mcnam...@intel.com>; Ding, Xuan
> <xuan.d...@intel.com>; Jiang, Cheng1 <cheng1.ji...@intel.com>;
> lian...@liangbit.com; Hu, Jiayu <jiayu...@intel.com>
> Subject: [PATCH v1 1/1] vhost: integrate dmadev in asynchronous datapath
> 
> Since dmadev is introduced in 21.11, to avoid the overhead of vhost DMA
> abstraction layer and simplify application logics, this patch integrates
> dmadev in asynchronous data path.
> 
> Signed-off-by: Jiayu Hu <jiayu...@intel.com>
> Signed-off-by: Sunil Pai G <sunil.pa...@intel.com>
> ---
>  doc/guides/prog_guide/vhost_lib.rst |  70 ++++-----
>  examples/vhost/Makefile             |   2 +-
>  examples/vhost/ioat.c               | 218 --------------------------
>  examples/vhost/ioat.h               |  63 --------
>  examples/vhost/main.c               | 230 +++++++++++++++++++++++-----
>  examples/vhost/main.h               |  11 ++
>  examples/vhost/meson.build          |   6 +-
>  lib/vhost/meson.build               |   3 +-
>  lib/vhost/rte_vhost_async.h         | 121 +++++----------
>  lib/vhost/version.map               |   3 +
>  lib/vhost/vhost.c                   | 130 +++++++++++-----
>  lib/vhost/vhost.h                   |  53 ++++++-
>  lib/vhost/virtio_net.c              | 206 +++++++++++++++++++------
>  13 files changed, 587 insertions(+), 529 deletions(-)
>  delete mode 100644 examples/vhost/ioat.c
>  delete mode 100644 examples/vhost/ioat.h
> 
> diff --git a/doc/guides/prog_guide/vhost_lib.rst
> b/doc/guides/prog_guide/vhost_lib.rst
> index 76f5d303c9..bdce7cbf02 100644
> --- a/doc/guides/prog_guide/vhost_lib.rst
> +++ b/doc/guides/prog_guide/vhost_lib.rst
> @@ -218,38 +218,12 @@ The following is an overview of some key Vhost API
> functions:
> 
>    Enable or disable zero copy feature of the vhost crypto backend.
> 
> -* ``rte_vhost_async_channel_register(vid, queue_id, config, ops)``
> +* ``rte_vhost_async_channel_register(vid, queue_id)``
> 
>    Register an async copy device channel for a vhost queue after vring

Since dmadev is here, let's just use 'DMA device' instead of 'copy device'

> -  is enabled. Following device ``config`` must be specified together
> -  with the registration:
> +  is enabled.
> 
> -  * ``features``
> -
> -    This field is used to specify async copy device features.
> -
> -    ``RTE_VHOST_ASYNC_INORDER`` represents the async copy device can
> -    guarantee the order of copy completion is the same as the order
> -    of copy submission.
> -
> -    Currently, only ``RTE_VHOST_ASYNC_INORDER`` capable device is
> -    supported by vhost.
> -
> -  Applications must provide following ``ops`` callbacks for vhost lib to
> -  work with the async copy devices:
> -
> -  * ``transfer_data(vid, queue_id, descs, opaque_data, count)``
> -
> -    vhost invokes this function to submit copy data to the async devices.
> -    For non-async_inorder capable devices, ``opaque_data`` could be used
> -    for identifying the completed packets.
> -
> -  * ``check_completed_copies(vid, queue_id, opaque_data, max_packets)``
> -
> -    vhost invokes this function to get the copy data completed by async
> -    devices.
> -
> -* ``rte_vhost_async_channel_register_thread_unsafe(vid, queue_id, config,
> ops)``
> +* ``rte_vhost_async_channel_register_thread_unsafe(vid, queue_id)``
> 
>    Register an async copy device channel for a vhost queue without
>    performing any locking.
> @@ -277,18 +251,13 @@ The following is an overview of some key Vhost API
> functions:
>    This function is only safe to call in vhost callback functions
>    (i.e., struct rte_vhost_device_ops).
> 
> -* ``rte_vhost_submit_enqueue_burst(vid, queue_id, pkts, count, comp_pkts,
> comp_count)``
> +* ``rte_vhost_submit_enqueue_burst(vid, queue_id, pkts, count, dma_id,
> dma_vchan)``
> 
>    Submit an enqueue request to transmit ``count`` packets from host to guest
> -  by async data path. Successfully enqueued packets can be transfer completed
> -  or being occupied by DMA engines; transfer completed packets are returned
> in
> -  ``comp_pkts``, but others are not guaranteed to finish, when this API
> -  call returns.
> +  by async data path. Applications must not free the packets submitted for
> +  enqueue until the packets are completed.
> 
> -  Applications must not free the packets submitted for enqueue until the
> -  packets are completed.
> -
> -* ``rte_vhost_poll_enqueue_completed(vid, queue_id, pkts, count)``
> +* ``rte_vhost_poll_enqueue_completed(vid, queue_id, pkts, count, dma_id,
> dma_vchan)``
> 
>    Poll enqueue completion status from async data path. Completed packets
>    are returned to applications through ``pkts``.
> @@ -298,7 +267,7 @@ The following is an overview of some key Vhost API
> functions:
>    This function returns the amount of in-flight packets for the vhost
>    queue using async acceleration.
> 
> -* ``rte_vhost_clear_queue_thread_unsafe(vid, queue_id, **pkts, count)``
> +* ``rte_vhost_clear_queue_thread_unsafe(vid, queue_id, **pkts, count, dma_id,
> dma_vchan)``
> 
>    Clear inflight packets which are submitted to DMA engine in vhost async
> data
>    path. Completed packets are returned to applications through ``pkts``.
> @@ -442,3 +411,26 @@ Finally, a set of device ops is defined for device
> specific operations:
>  * ``get_notify_area``
> 
>    Called to get the notify area info of the queue.
> +
> +Vhost asynchronous data path
> +----------------------------
> +
> +Vhost asynchronous data path leverages DMA devices to offload memory
> +copies from the CPU and it is implemented in an asynchronous way. It
> +enables applcations, like OVS, to save CPU cycles and hide memory copy
> +overhead, thus achieving higher throughput.
> +
> +Vhost doesn't manage DMA devices and applications, like OVS, need to
> +manage and configure DMA devices. Applications need to tell vhost what
> +DMA devices to use in every data path function call. This design enables
> +the flexibility for applications to dynamically use DMA channels in
> +different function modules, not limited in vhost.
> +
> +In addition, vhost supports M:N mapping between vrings and DMA virtual
> +channels. Specifically, one vring can use multiple different DMA channels
> +and one DMA channel can be shared by multiple vrings at the same time.
> +The reason of enabling one vring to use multiple DMA channels is that
> +it's possible that more than one dataplane threads enqueue packets to
> +the same vring with their own DMA virtual channels. Besides, the number
> +of DMA devices is limited. For the purpose of scaling, it's necessary to
> +support sharing DMA channels among vrings.
> diff --git a/examples/vhost/Makefile b/examples/vhost/Makefile
> index 587ea2ab47..975a5dfe40 100644
> --- a/examples/vhost/Makefile
> +++ b/examples/vhost/Makefile
> @@ -5,7 +5,7 @@
>  APP = vhost-switch
> 
>  # all source are stored in SRCS-y
> -SRCS-y := main.c virtio_net.c ioat.c
> +SRCS-y := main.c virtio_net.c
> 
>  PKGCONF ?= pkg-config
> 
> diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
> deleted file mode 100644
> index 9aeeb12fd9..0000000000
> --- a/examples/vhost/ioat.c
> +++ /dev/null
> @@ -1,218 +0,0 @@
> -/* SPDX-License-Identifier: BSD-3-Clause
> - * Copyright(c) 2010-2020 Intel Corporation
> - */
> -
> -#include <sys/uio.h>
> -#ifdef RTE_RAW_IOAT
> -#include <rte_rawdev.h>
> -#include <rte_ioat_rawdev.h>
> -
> -#include "ioat.h"
> -#include "main.h"
> -
> -struct dma_for_vhost dma_bind[MAX_VHOST_DEVICE];
> -
> -struct packet_tracker {
> -     unsigned short size_track[MAX_ENQUEUED_SIZE];
> -     unsigned short next_read;
> -     unsigned short next_write;
> -     unsigned short last_remain;
> -     unsigned short ioat_space;
> -};
> -
> -struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
> -
> -int
> -open_ioat(const char *value)
> -{
> -     struct dma_for_vhost *dma_info = dma_bind;
> -     char *input = strndup(value, strlen(value) + 1);
> -     char *addrs = input;
> -     char *ptrs[2];
> -     char *start, *end, *substr;
> -     int64_t vid, vring_id;
> -     struct rte_ioat_rawdev_config config;
> -     struct rte_rawdev_info info = { .dev_private = &config };
> -     char name[32];
> -     int dev_id;
> -     int ret = 0;
> -     uint16_t i = 0;
> -     char *dma_arg[MAX_VHOST_DEVICE];
> -     int args_nr;
> -
> -     while (isblank(*addrs))
> -             addrs++;
> -     if (*addrs == '\0') {
> -             ret = -1;
> -             goto out;
> -     }
> -
> -     /* process DMA devices within bracket. */
> -     addrs++;
> -     substr = strtok(addrs, ";]");
> -     if (!substr) {
> -             ret = -1;
> -             goto out;
> -     }
> -     args_nr = rte_strsplit(substr, strlen(substr),
> -                     dma_arg, MAX_VHOST_DEVICE, ',');
> -     if (args_nr <= 0) {
> -             ret = -1;
> -             goto out;
> -     }
> -     while (i < args_nr) {
> -             char *arg_temp = dma_arg[i];
> -             uint8_t sub_nr;
> -             sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
> -             if (sub_nr != 2) {
> -                     ret = -1;
> -                     goto out;
> -             }
> -
> -             start = strstr(ptrs[0], "txd");
> -             if (start == NULL) {
> -                     ret = -1;
> -                     goto out;
> -             }
> -
> -             start += 3;
> -             vid = strtol(start, &end, 0);
> -             if (end == start) {
> -                     ret = -1;
> -                     goto out;
> -             }
> -
> -             vring_id = 0 + VIRTIO_RXQ;
> -             if (rte_pci_addr_parse(ptrs[1],
> -                             &(dma_info + vid)->dmas[vring_id].addr) < 0) {
> -                     ret = -1;
> -                     goto out;
> -             }
> -
> -             rte_pci_device_name(&(dma_info + vid)->dmas[vring_id].addr,
> -                             name, sizeof(name));
> -             dev_id = rte_rawdev_get_dev_id(name);
> -             if (dev_id == (uint16_t)(-ENODEV) ||
> -             dev_id == (uint16_t)(-EINVAL)) {
> -                     ret = -1;
> -                     goto out;
> -             }
> -
> -             if (rte_rawdev_info_get(dev_id, &info, sizeof(config)) < 0 ||
> -             strstr(info.driver_name, "ioat") == NULL) {
> -                     ret = -1;
> -                     goto out;
> -             }
> -
> -             (dma_info + vid)->dmas[vring_id].dev_id = dev_id;
> -             (dma_info + vid)->dmas[vring_id].is_valid = true;
> -             config.ring_size = IOAT_RING_SIZE;
> -             config.hdls_disable = true;
> -             if (rte_rawdev_configure(dev_id, &info, sizeof(config)) < 0) {
> -                     ret = -1;
> -                     goto out;
> -             }
> -             rte_rawdev_start(dev_id);
> -             cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE - 1;
> -             dma_info->nr++;
> -             i++;
> -     }
> -out:
> -     free(input);
> -     return ret;
> -}
> -
> -int32_t
> -ioat_transfer_data_cb(int vid, uint16_t queue_id,
> -             struct rte_vhost_iov_iter *iov_iter,
> -             struct rte_vhost_async_status *opaque_data, uint16_t count)
> -{
> -     uint32_t i_iter;
> -     uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
> -     struct rte_vhost_iov_iter *iter = NULL;
> -     unsigned long i_seg;
> -     unsigned short mask = MAX_ENQUEUED_SIZE - 1;
> -     unsigned short write = cb_tracker[dev_id].next_write;
> -
> -     if (!opaque_data) {
> -             for (i_iter = 0; i_iter < count; i_iter++) {
> -                     iter = iov_iter + i_iter;
> -                     i_seg = 0;
> -                     if (cb_tracker[dev_id].ioat_space < iter->nr_segs)
> -                             break;
> -                     while (i_seg < iter->nr_segs) {
> -                             rte_ioat_enqueue_copy(dev_id,
> -                                     (uintptr_t)(iter->iov[i_seg].src_addr),
> -                                     (uintptr_t)(iter->iov[i_seg].dst_addr),
> -                                     iter->iov[i_seg].len,
> -                                     0,
> -                                     0);
> -                             i_seg++;
> -                     }
> -                     write &= mask;
> -                     cb_tracker[dev_id].size_track[write] = iter->nr_segs;
> -                     cb_tracker[dev_id].ioat_space -= iter->nr_segs;
> -                     write++;
> -             }
> -     } else {
> -             /* Opaque data is not supported */
> -             return -1;
> -     }
> -     /* ring the doorbell */
> -     rte_ioat_perform_ops(dev_id);
> -     cb_tracker[dev_id].next_write = write;
> -     return i_iter;
> -}
> -
> -int32_t
> -ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
> -             struct rte_vhost_async_status *opaque_data,
> -             uint16_t max_packets)
> -{
> -     if (!opaque_data) {
> -             uintptr_t dump[255];
> -             int n_seg;
> -             unsigned short read, write;
> -             unsigned short nb_packet = 0;
> -             unsigned short mask = MAX_ENQUEUED_SIZE - 1;
> -             unsigned short i;
> -
> -             uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2
> -                             + VIRTIO_RXQ].dev_id;
> -             n_seg = rte_ioat_completed_ops(dev_id, 255, NULL, NULL, dump,
> dump);
> -             if (n_seg < 0) {
> -                     RTE_LOG(ERR,
> -                             VHOST_DATA,
> -                             "fail to poll completed buf on IOAT device %u",
> -                             dev_id);
> -                     return 0;
> -             }
> -             if (n_seg == 0)
> -                     return 0;
> -
> -             cb_tracker[dev_id].ioat_space += n_seg;
> -             n_seg += cb_tracker[dev_id].last_remain;
> -
> -             read = cb_tracker[dev_id].next_read;
> -             write = cb_tracker[dev_id].next_write;
> -             for (i = 0; i < max_packets; i++) {
> -                     read &= mask;
> -                     if (read == write)
> -                             break;
> -                     if (n_seg >= cb_tracker[dev_id].size_track[read]) {
> -                             n_seg -= cb_tracker[dev_id].size_track[read];
> -                             read++;
> -                             nb_packet++;
> -                     } else {
> -                             break;
> -                     }
> -             }
> -             cb_tracker[dev_id].next_read = read;
> -             cb_tracker[dev_id].last_remain = n_seg;
> -             return nb_packet;
> -     }
> -     /* Opaque data is not supported */
> -     return -1;
> -}
> -
> -#endif /* RTE_RAW_IOAT */
> diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
> deleted file mode 100644
> index d9bf717e8d..0000000000
> --- a/examples/vhost/ioat.h
> +++ /dev/null
> @@ -1,63 +0,0 @@
> -/* SPDX-License-Identifier: BSD-3-Clause
> - * Copyright(c) 2010-2020 Intel Corporation
> - */
> -
> -#ifndef _IOAT_H_
> -#define _IOAT_H_
> -
> -#include <rte_vhost.h>
> -#include <rte_pci.h>
> -#include <rte_vhost_async.h>
> -
> -#define MAX_VHOST_DEVICE 1024
> -#define IOAT_RING_SIZE 4096
> -#define MAX_ENQUEUED_SIZE 4096
> -
> -struct dma_info {
> -     struct rte_pci_addr addr;
> -     uint16_t dev_id;
> -     bool is_valid;
> -};
> -
> -struct dma_for_vhost {
> -     struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
> -     uint16_t nr;
> -};
> -
> -#ifdef RTE_RAW_IOAT
> -int open_ioat(const char *value);
> -
> -int32_t
> -ioat_transfer_data_cb(int vid, uint16_t queue_id,
> -             struct rte_vhost_iov_iter *iov_iter,
> -             struct rte_vhost_async_status *opaque_data, uint16_t count);
> -
> -int32_t
> -ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
> -             struct rte_vhost_async_status *opaque_data,
> -             uint16_t max_packets);
> -#else
> -static int open_ioat(const char *value __rte_unused)
> -{
> -     return -1;
> -}
> -
> -static int32_t
> -ioat_transfer_data_cb(int vid __rte_unused, uint16_t queue_id __rte_unused,
> -             struct rte_vhost_iov_iter *iov_iter __rte_unused,
> -             struct rte_vhost_async_status *opaque_data __rte_unused,
> -             uint16_t count __rte_unused)
> -{
> -     return -1;
> -}
> -
> -static int32_t
> -ioat_check_completed_copies_cb(int vid __rte_unused,
> -             uint16_t queue_id __rte_unused,
> -             struct rte_vhost_async_status *opaque_data __rte_unused,
> -             uint16_t max_packets __rte_unused)
> -{
> -     return -1;
> -}
> -#endif
> -#endif /* _IOAT_H_ */
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index 33d023aa39..44073499bc 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -24,8 +24,9 @@
>  #include <rte_ip.h>
>  #include <rte_tcp.h>
>  #include <rte_pause.h>
> +#include <rte_dmadev.h>
> +#include <rte_vhost_async.h>
> 
> -#include "ioat.h"
>  #include "main.h"
> 
>  #ifndef MAX_QUEUES
> @@ -56,6 +57,14 @@
>  #define RTE_TEST_TX_DESC_DEFAULT 512
> 
>  #define INVALID_PORT_ID 0xFF
> +#define INVALID_DMA_ID -1
> +
> +#define MAX_VHOST_DEVICE 1024
> +#define DMA_RING_SIZE 4096
> +
> +struct dma_for_vhost dma_bind[MAX_VHOST_DEVICE];
> +struct rte_vhost_async_dma_info dma_config[RTE_DMADEV_DEFAULT_MAX];
> +static int dma_count;
> 
>  /* mask of enabled ports */
>  static uint32_t enabled_port_mask = 0;
> @@ -96,8 +105,6 @@ static int builtin_net_driver;
> 
>  static int async_vhost_driver;
> 
> -static char *dma_type;
> -
>  /* Specify timeout (in useconds) between retries on RX. */
>  static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
>  /* Specify the number of retries on RX. */
> @@ -196,13 +203,134 @@ struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE *
> MAX_VHOST_DEVICE];
>  #define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \
>                                / US_PER_S * BURST_TX_DRAIN_US)
> 
> +static inline bool
> +is_dma_configured(int16_t dev_id)
> +{
> +     int i;
> +
> +     for (i = 0; i < dma_count; i++) {
> +             if (dma_config[i].dev_id == dev_id) {
> +                     return true;
> +             }
> +     }
> +     return false;
> +}
> +
>  static inline int
>  open_dma(const char *value)
>  {
> -     if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0)
> -             return open_ioat(value);
> +     struct dma_for_vhost *dma_info = dma_bind;
> +     char *input = strndup(value, strlen(value) + 1);
> +     char *addrs = input;
> +     char *ptrs[2];
> +     char *start, *end, *substr;
> +     int64_t vid, vring_id;
> +
> +     struct rte_dma_info info;
> +     struct rte_dma_conf dev_config = { .nb_vchans = 1 };
> +     struct rte_dma_vchan_conf qconf = {
> +             .direction = RTE_DMA_DIR_MEM_TO_MEM,
> +             .nb_desc = DMA_RING_SIZE
> +     };
> +
> +     int dev_id;
> +     int ret = 0;
> +     uint16_t i = 0;
> +     char *dma_arg[MAX_VHOST_DEVICE];
> +     int args_nr;
> +
> +     while (isblank(*addrs))
> +             addrs++;
> +     if (*addrs == '\0') {
> +             ret = -1;
> +             goto out;
> +     }
> +
> +     /* process DMA devices within bracket. */
> +     addrs++;
> +     substr = strtok(addrs, ";]");
> +     if (!substr) {
> +             ret = -1;
> +             goto out;
> +     }
> +
> +     args_nr = rte_strsplit(substr, strlen(substr),
> +                     dma_arg, MAX_VHOST_DEVICE, ',');
> +     if (args_nr <= 0) {
> +             ret = -1;
> +             goto out;
> +     }
> +
> +     while (i < args_nr) {
> +             char *arg_temp = dma_arg[i];
> +             uint8_t sub_nr;
> +
> +             sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
> +             if (sub_nr != 2) {
> +                     ret = -1;
> +                     goto out;
> +             }
> +
> +             start = strstr(ptrs[0], "txd");
> +             if (start == NULL) {
> +                     ret = -1;
> +                     goto out;
> +             }
> +
> +             start += 3;
> +             vid = strtol(start, &end, 0);
> +             if (end == start) {
> +                     ret = -1;
> +                     goto out;
> +             }
> +
> +             vring_id = 0 + VIRTIO_RXQ;

No need to introduce vring_id, it's always VIRTIO_RXQ

> +
> +             dev_id = rte_dma_get_dev_id_by_name(ptrs[1]);
> +             if (dev_id < 0) {
> +                     RTE_LOG(ERR, VHOST_CONFIG, "Fail to find DMA %s.\n",
> ptrs[1]);
> +                     ret = -1;
> +                     goto out;
> +             } else if (is_dma_configured(dev_id)) {
> +                     goto done;
> +             }
> +

Please call rte_dma_info_get before configure to make sure info.max_vchans >=1

> +             if (rte_dma_configure(dev_id, &dev_config) != 0) {
> +                     RTE_LOG(ERR, VHOST_CONFIG, "Fail to configure DMA 
> %d.\n",
> dev_id);
> +                     ret = -1;
> +                     goto out;
> +             }
> +
> +             if (rte_dma_vchan_setup(dev_id, 0, &qconf) != 0) {
> +                     RTE_LOG(ERR, VHOST_CONFIG, "Fail to set up DMA %d.\n",
> dev_id);
> +                     ret = -1;
> +                     goto out;
> +             }
> 
> -     return -1;
> +             rte_dma_info_get(dev_id, &info);
> +             if (info.nb_vchans != 1) {
> +                     RTE_LOG(ERR, VHOST_CONFIG, "DMA %d has no queues.\n",
> dev_id);

Then the above means the number of vchan is not configured.

> +                     ret = -1;
> +                     goto out;
> +             }
> +
> +             if (rte_dma_start(dev_id) != 0) {
> +                     RTE_LOG(ERR, VHOST_CONFIG, "Fail to start DMA %u.\n",
> dev_id);
> +                     ret = -1;
> +                     goto out;
> +             }
> +
> +             dma_config[dma_count].dev_id = dev_id;
> +             dma_config[dma_count].max_vchans = 1;
> +             dma_config[dma_count++].max_desc = DMA_RING_SIZE;
> +
> +done:
> +             (dma_info + vid)->dmas[vring_id].dev_id = dev_id;
> +             i++;
> +     }
> +out:
> +     free(input);
> +     return ret;
>  }
> 
>  /*
> @@ -500,8 +628,6 @@ enum {
>       OPT_CLIENT_NUM,
>  #define OPT_BUILTIN_NET_DRIVER  "builtin-net-driver"
>       OPT_BUILTIN_NET_DRIVER_NUM,
> -#define OPT_DMA_TYPE            "dma-type"
> -     OPT_DMA_TYPE_NUM,
>  #define OPT_DMAS                "dmas"
>       OPT_DMAS_NUM,
>  };
> @@ -539,8 +665,6 @@ us_vhost_parse_args(int argc, char **argv)
>                               NULL, OPT_CLIENT_NUM},
>               {OPT_BUILTIN_NET_DRIVER, no_argument,
>                               NULL, OPT_BUILTIN_NET_DRIVER_NUM},
> -             {OPT_DMA_TYPE, required_argument,
> -                             NULL, OPT_DMA_TYPE_NUM},
>               {OPT_DMAS, required_argument,
>                               NULL, OPT_DMAS_NUM},
>               {NULL, 0, 0, 0},
> @@ -661,10 +785,6 @@ us_vhost_parse_args(int argc, char **argv)
>                       }
>                       break;
> 
> -             case OPT_DMA_TYPE_NUM:
> -                     dma_type = optarg;
> -                     break;
> -
>               case OPT_DMAS_NUM:
>                       if (open_dma(optarg) == -1) {
>                               RTE_LOG(INFO, VHOST_CONFIG,
> @@ -841,9 +961,10 @@ complete_async_pkts(struct vhost_dev *vdev)
>  {
>       struct rte_mbuf *p_cpl[MAX_PKT_BURST];
>       uint16_t complete_count;
> +     int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
> 
>       complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> -                                     VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
> +                                     VIRTIO_RXQ, p_cpl, MAX_PKT_BURST, 
> dma_id, 0);
>       if (complete_count) {
>               free_pkts(p_cpl, complete_count);
>               __atomic_sub_fetch(&vdev->pkts_inflight, complete_count,
> __ATOMIC_SEQ_CST);
> @@ -883,11 +1004,12 @@ drain_vhost(struct vhost_dev *vdev)
> 
>       if (builtin_net_driver) {
>               ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
> -     } else if (async_vhost_driver) {
> +     } else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
>               uint16_t enqueue_fail = 0;
> +             int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
> 
>               complete_async_pkts(vdev);
> -             ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, m,
> nr_xmit);
> +             ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, m,
> nr_xmit, dma_id, 0);
>               __atomic_add_fetch(&vdev->pkts_inflight, ret, __ATOMIC_SEQ_CST);
> 
>               enqueue_fail = nr_xmit - ret;
> @@ -905,7 +1027,7 @@ drain_vhost(struct vhost_dev *vdev)
>                               __ATOMIC_SEQ_CST);
>       }
> 
> -     if (!async_vhost_driver)
> +     if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
>               free_pkts(m, nr_xmit);
>  }
> 
> @@ -1211,12 +1333,13 @@ drain_eth_rx(struct vhost_dev *vdev)
>       if (builtin_net_driver) {
>               enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
>                                               pkts, rx_count);
> -     } else if (async_vhost_driver) {
> +     } else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
>               uint16_t enqueue_fail = 0;
> +             int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
> 
>               complete_async_pkts(vdev);
>               enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
> -                                     VIRTIO_RXQ, pkts, rx_count);
> +                                     VIRTIO_RXQ, pkts, rx_count, dma_id, 0);
>               __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count,
> __ATOMIC_SEQ_CST);
> 
>               enqueue_fail = rx_count - enqueue_count;
> @@ -1235,7 +1358,7 @@ drain_eth_rx(struct vhost_dev *vdev)
>                               __ATOMIC_SEQ_CST);
>       }
> 
> -     if (!async_vhost_driver)
> +     if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
>               free_pkts(pkts, rx_count);
>  }
> 
> @@ -1387,18 +1510,20 @@ destroy_device(int vid)
>               "(%d) device has been removed from data core\n",
>               vdev->vid);
> 
> -     if (async_vhost_driver) {
> +     if (dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled) {
>               uint16_t n_pkt = 0;
> +             int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
>               struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> 
>               while (vdev->pkts_inflight) {
>                       n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, 
> VIRTIO_RXQ,
> -                                             m_cpl, vdev->pkts_inflight);
> +                                             m_cpl, vdev->pkts_inflight, 
> dma_id, 0);
>                       free_pkts(m_cpl, n_pkt);
>                       __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt,
> __ATOMIC_SEQ_CST);
>               }
> 
>               rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> +             dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = false;
>       }
> 
>       rte_free(vdev);
> @@ -1468,20 +1593,14 @@ new_device(int vid)
>               "(%d) device has been added to data core %d\n",
>               vid, vdev->coreid);
> 
> -     if (async_vhost_driver) {
> -             struct rte_vhost_async_config config = {0};
> -             struct rte_vhost_async_channel_ops channel_ops;
> -
> -             if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) {
> -                     channel_ops.transfer_data = ioat_transfer_data_cb;
> -                     channel_ops.check_completed_copies =
> -                             ioat_check_completed_copies_cb;
> -
> -                     config.features = RTE_VHOST_ASYNC_INORDER;
> +     if (dma_bind[vid].dmas[VIRTIO_RXQ].dev_id != INVALID_DMA_ID) {
> +             int ret;
> 
> -                     return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
> -                             config, &channel_ops);
> +             ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
> +             if (ret == 0) {
> +                     dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = true;
>               }
> +             return ret;
>       }
> 
>       return 0;
> @@ -1502,14 +1621,15 @@ vring_state_changed(int vid, uint16_t queue_id, int
> enable)
>       if (queue_id != VIRTIO_RXQ)
>               return 0;
> 
> -     if (async_vhost_driver) {
> +     if (dma_bind[vid].dmas[queue_id].async_enabled) {
>               if (!enable) {
>                       uint16_t n_pkt = 0;
> +                     int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
>                       struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> 
>                       while (vdev->pkts_inflight) {
>                               n_pkt = rte_vhost_clear_queue_thread_unsafe(vid,
> queue_id,
> -                                                     m_cpl, 
> vdev->pkts_inflight);
> +                                                     m_cpl, 
> vdev->pkts_inflight, dma_id,
> 0);
>                               free_pkts(m_cpl, n_pkt);
>                               __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt,
> __ATOMIC_SEQ_CST);
>                       }
> @@ -1657,6 +1777,25 @@ create_mbuf_pool(uint16_t nr_port, uint32_t
> nr_switch_core, uint32_t mbuf_size,
>               rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
>  }
> 
> +static void
> +init_dma(void)
> +{
> +     int i;
> +
> +     for (i = 0; i < MAX_VHOST_DEVICE; i++) {
> +             int j;
> +
> +             for (j = 0; j < RTE_MAX_QUEUES_PER_PORT * 2; j++) {
> +                     dma_bind[i].dmas[j].dev_id = INVALID_DMA_ID;
> +                     dma_bind[i].dmas[j].async_enabled = false;
> +             }
> +     }
> +
> +     for (i = 0; i < RTE_DMADEV_DEFAULT_MAX; i++) {
> +             dma_config[i].dev_id = INVALID_DMA_ID;
> +     }
> +}
> +
>  /*
>   * Main function, does initialisation and calls the per-lcore functions.
>   */
> @@ -1679,6 +1818,9 @@ main(int argc, char *argv[])
>       argc -= ret;
>       argv += ret;
> 
> +     /* initialize dma structures */
> +     init_dma();
> +
>       /* parse app arguments */
>       ret = us_vhost_parse_args(argc, argv);
>       if (ret < 0)
> @@ -1754,6 +1896,20 @@ main(int argc, char *argv[])
>       if (client_mode)
>               flags |= RTE_VHOST_USER_CLIENT;
> 
> +     if (async_vhost_driver) {
> +             if (rte_vhost_async_dma_configure(dma_config, dma_count) < 0) {
> +                     RTE_LOG(ERR, VHOST_PORT, "Failed to configure DMA in
> vhost.\n");
> +                     for (i = 0; i < dma_count; i++) {
> +                             if (dma_config[i].dev_id != INVALID_DMA_ID) {
> +                                     rte_dma_stop(dma_config[i].dev_id);
> +                                     dma_config[i].dev_id = INVALID_DMA_ID;
> +                             }
> +                     }
> +                     dma_count = 0;
> +                     async_vhost_driver = false;
> +             }
> +     }
> +
>       /* Register vhost user driver to handle vhost messages. */
>       for (i = 0; i < nb_sockets; i++) {
>               char *file = socket_files + i * PATH_MAX;
> diff --git a/examples/vhost/main.h b/examples/vhost/main.h
> index e7b1ac60a6..b4a453e77e 100644
> --- a/examples/vhost/main.h
> +++ b/examples/vhost/main.h
> @@ -8,6 +8,7 @@
>  #include <sys/queue.h>
> 
>  #include <rte_ether.h>
> +#include <rte_pci.h>
> 
>  /* Macros for printing using RTE_LOG */
>  #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
> @@ -79,6 +80,16 @@ struct lcore_info {
>       struct vhost_dev_tailq_list vdev_list;
>  };
> 
> +struct dma_info {
> +     struct rte_pci_addr addr;
> +     int16_t dev_id;
> +     bool async_enabled;
> +};
> +
> +struct dma_for_vhost {
> +     struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
> +};
> +
>  /* we implement non-extra virtio net features */
>  #define VIRTIO_NET_FEATURES  0
> 
> diff --git a/examples/vhost/meson.build b/examples/vhost/meson.build
> index 3efd5e6540..87a637f83f 100644
> --- a/examples/vhost/meson.build
> +++ b/examples/vhost/meson.build
> @@ -12,13 +12,9 @@ if not is_linux
>  endif
> 
>  deps += 'vhost'
> +deps += 'dmadev'
>  allow_experimental_apis = true
>  sources = files(
>          'main.c',
>          'virtio_net.c',
>  )
> -
> -if dpdk_conf.has('RTE_RAW_IOAT')
> -    deps += 'raw_ioat'
> -    sources += files('ioat.c')
> -endif
> diff --git a/lib/vhost/meson.build b/lib/vhost/meson.build
> index cdb37a4814..8107329400 100644
> --- a/lib/vhost/meson.build
> +++ b/lib/vhost/meson.build
> @@ -33,7 +33,8 @@ headers = files(
>          'rte_vhost_async.h',
>          'rte_vhost_crypto.h',
>  )
> +
>  driver_sdk_headers = files(
>          'vdpa_driver.h',
>  )
> -deps += ['ethdev', 'cryptodev', 'hash', 'pci']
> +deps += ['ethdev', 'cryptodev', 'hash', 'pci', 'dmadev']
> diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
> index a87ea6ba37..23a7a2d8b3 100644
> --- a/lib/vhost/rte_vhost_async.h
> +++ b/lib/vhost/rte_vhost_async.h
> @@ -27,70 +27,12 @@ struct rte_vhost_iov_iter {
>  };
> 
>  /**
> - * dma transfer status
> + * DMA device information
>   */
> -struct rte_vhost_async_status {
> -     /** An array of application specific data for source memory */
> -     uintptr_t *src_opaque_data;
> -     /** An array of application specific data for destination memory */
> -     uintptr_t *dst_opaque_data;
> -};
> -
> -/**
> - * dma operation callbacks to be implemented by applications
> - */
> -struct rte_vhost_async_channel_ops {
> -     /**
> -      * instruct async engines to perform copies for a batch of packets
> -      *
> -      * @param vid
> -      *  id of vhost device to perform data copies
> -      * @param queue_id
> -      *  queue id to perform data copies
> -      * @param iov_iter
> -      *  an array of IOV iterators
> -      * @param opaque_data
> -      *  opaque data pair sending to DMA engine
> -      * @param count
> -      *  number of elements in the "descs" array
> -      * @return
> -      *  number of IOV iterators processed, negative value means error
> -      */
> -     int32_t (*transfer_data)(int vid, uint16_t queue_id,
> -             struct rte_vhost_iov_iter *iov_iter,
> -             struct rte_vhost_async_status *opaque_data,
> -             uint16_t count);
> -     /**
> -      * check copy-completed packets from the async engine
> -      * @param vid
> -      *  id of vhost device to check copy completion
> -      * @param queue_id
> -      *  queue id to check copy completion
> -      * @param opaque_data
> -      *  buffer to receive the opaque data pair from DMA engine
> -      * @param max_packets
> -      *  max number of packets could be completed
> -      * @return
> -      *  number of async descs completed, negative value means error
> -      */
> -     int32_t (*check_completed_copies)(int vid, uint16_t queue_id,
> -             struct rte_vhost_async_status *opaque_data,
> -             uint16_t max_packets);
> -};
> -
> -/**
> - *  async channel features
> - */
> -enum {
> -     RTE_VHOST_ASYNC_INORDER = 1U << 0,
> -};
> -
> -/**
> - *  async channel configuration
> - */
> -struct rte_vhost_async_config {
> -     uint32_t features;
> -     uint32_t rsvd[2];
> +struct rte_vhost_async_dma_info {
> +     int16_t dev_id; /* DMA device ID */
> +     uint16_t max_vchans;    /* max number of vchan */
> +     uint16_t max_desc;      /* max desc number of vchan */
>  };
> 
>  /**
> @@ -100,17 +42,11 @@ struct rte_vhost_async_config {
>   *  vhost device id async channel to be attached to
>   * @param queue_id
>   *  vhost queue id async channel to be attached to
> - * @param config
> - *  Async channel configuration structure
> - * @param ops
> - *  Async channel operation callbacks
>   * @return
>   *  0 on success, -1 on failures
>   */
>  __rte_experimental
> -int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
> -     struct rte_vhost_async_config config,
> -     struct rte_vhost_async_channel_ops *ops);
> +int rte_vhost_async_channel_register(int vid, uint16_t queue_id);
> 
>  /**
>   * Unregister an async channel for a vhost queue
> @@ -136,17 +72,11 @@ int rte_vhost_async_channel_unregister(int vid, uint16_t
> queue_id);
>   *  vhost device id async channel to be attached to
>   * @param queue_id
>   *  vhost queue id async channel to be attached to
> - * @param config
> - *  Async channel configuration
> - * @param ops
> - *  Async channel operation callbacks
>   * @return
>   *  0 on success, -1 on failures
>   */
>  __rte_experimental
> -int rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t 
> queue_id,
> -     struct rte_vhost_async_config config,
> -     struct rte_vhost_async_channel_ops *ops);
> +int rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t
> queue_id);
> 
>  /**
>   * Unregister an async channel for a vhost queue without performing any
> @@ -179,12 +109,17 @@ int rte_vhost_async_channel_unregister_thread_unsafe(int
> vid,
>   *  array of packets to be enqueued
>   * @param count
>   *  packets num to be enqueued
> + * @param dma_id
> + *  the identifier of the DMA device
> + * @param vchan
> + *  the identifier of virtual DMA channel
>   * @return
>   *  num of packets enqueued
>   */
>  __rte_experimental
>  uint16_t rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
> -             struct rte_mbuf **pkts, uint16_t count);
> +             struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
> +             uint16_t vchan);

All dma_id in the API should be uint16_t. Otherwise you need to check if valid.

> 
>  /**
>   * This function checks async completion status for a specific vhost
> @@ -199,12 +134,17 @@ uint16_t rte_vhost_submit_enqueue_burst(int vid,
> uint16_t queue_id,
>   *  blank array to get return packet pointer
>   * @param count
>   *  size of the packet array
> + * @param dma_id
> + *  the identifier of the DMA device
> + * @param vchan
> + *  the identifier of virtual DMA channel
>   * @return
>   *  num of packets returned
>   */
>  __rte_experimental
>  uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> -             struct rte_mbuf **pkts, uint16_t count);
> +             struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
> +             uint16_t vchan);
> 
>  /**
>   * This function returns the amount of in-flight packets for the vhost
> @@ -235,11 +175,32 @@ int rte_vhost_async_get_inflight(int vid, uint16_t
> queue_id);
>   *  Blank array to get return packet pointer
>   * @param count
>   *  Size of the packet array
> + * @param dma_id
> + *  the identifier of the DMA device
> + * @param vchan
> + *  the identifier of virtual DMA channel
>   * @return
>   *  Number of packets returned
>   */
>  __rte_experimental
>  uint16_t rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
> -             struct rte_mbuf **pkts, uint16_t count);
> +             struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
> +             uint16_t vchan);
> +/**
> + * The DMA vChannels used in asynchronous data path must be configured
> + * first. So this function needs to be called before enabling DMA
> + * acceleration for vring. If this function fails, asynchronous data path
> + * cannot be enabled for any vring further.
> + *
> + * @param dmas
> + *  DMA information
> + * @param count
> + *  Element number of 'dmas'
> + * @return
> + *  0 on success, and -1 on failure
> + */
> +__rte_experimental
> +int rte_vhost_async_dma_configure(struct rte_vhost_async_dma_info *dmas,
> +             uint16_t count);

I think based on current design, vhost can use every vchan if user app let it.
So the max_desc and max_vchans can just be got from dmadev APIs? Then there's
no need to introduce the new ABI struct rte_vhost_async_dma_info.

And about max_desc, I see the dmadev lib, you can get vchan's max_desc but you
may use a nb_desc (<= max_desc) to configure vchanl. And IIUC, vhost wants to
know the nb_desc instead of max_desc?

> 
>  #endif /* _RTE_VHOST_ASYNC_H_ */
> diff --git a/lib/vhost/version.map b/lib/vhost/version.map
> index a7ef7f1976..1202ba9c1a 100644
> --- a/lib/vhost/version.map
> +++ b/lib/vhost/version.map
> @@ -84,6 +84,9 @@ EXPERIMENTAL {
> 
>       # added in 21.11
>       rte_vhost_get_monitor_addr;
> +
> +     # added in 22.03
> +     rte_vhost_async_dma_configure;
>  };
> 
>  INTERNAL {
> diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
> index 13a9bb9dd1..32f37f4851 100644
> --- a/lib/vhost/vhost.c
> +++ b/lib/vhost/vhost.c
> @@ -344,6 +344,7 @@ vhost_free_async_mem(struct vhost_virtqueue *vq)
>               return;
> 
>       rte_free(vq->async->pkts_info);
> +     rte_free(vq->async->pkts_cmpl_flag);
> 
>       rte_free(vq->async->buffers_packed);
>       vq->async->buffers_packed = NULL;
> @@ -1626,8 +1627,7 @@ rte_vhost_extern_callback_register(int vid,
>  }
> 
>  static __rte_always_inline int
> -async_channel_register(int vid, uint16_t queue_id,
> -             struct rte_vhost_async_channel_ops *ops)
> +async_channel_register(int vid, uint16_t queue_id)
>  {
>       struct virtio_net *dev = get_device(vid);
>       struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
> @@ -1656,6 +1656,14 @@ async_channel_register(int vid, uint16_t queue_id,
>               goto out_free_async;
>       }
> 
> +     async->pkts_cmpl_flag = rte_zmalloc_socket(NULL, vq->size * 
> sizeof(bool),
> +                     RTE_CACHE_LINE_SIZE, node);
> +     if (!async->pkts_cmpl_flag) {
> +             VHOST_LOG_CONFIG(ERR, "failed to allocate async pkts_cmpl_flag
> (vid %d, qid: %d)\n",
> +                             vid, queue_id);

qid: %u

> +             goto out_free_async;
> +     }
> +
>       if (vq_is_packed(dev)) {
>               async->buffers_packed = rte_malloc_socket(NULL,
>                               vq->size * sizeof(struct 
> vring_used_elem_packed),
> @@ -1676,9 +1684,6 @@ async_channel_register(int vid, uint16_t queue_id,
>               }
>       }
> 
> -     async->ops.check_completed_copies = ops->check_completed_copies;
> -     async->ops.transfer_data = ops->transfer_data;
> -
>       vq->async = async;
> 
>       return 0;
> @@ -1691,15 +1696,13 @@ async_channel_register(int vid, uint16_t queue_id,
>  }
> 
>  int
> -rte_vhost_async_channel_register(int vid, uint16_t queue_id,
> -             struct rte_vhost_async_config config,
> -             struct rte_vhost_async_channel_ops *ops)
> +rte_vhost_async_channel_register(int vid, uint16_t queue_id)
>  {
>       struct vhost_virtqueue *vq;
>       struct virtio_net *dev = get_device(vid);
>       int ret;
> 
> -     if (dev == NULL || ops == NULL)
> +     if (dev == NULL)
>               return -1;
> 
>       if (queue_id >= VHOST_MAX_VRING)
> @@ -1710,33 +1713,20 @@ rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
>       if (unlikely(vq == NULL || !dev->async_copy))
>               return -1;
> 
> -     if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) {
> -             VHOST_LOG_CONFIG(ERR,
> -                     "async copy is not supported on non-inorder mode "
> -                     "(vid %d, qid: %d)\n", vid, queue_id);
> -             return -1;
> -     }
> -
> -     if (unlikely(ops->check_completed_copies == NULL ||
> -             ops->transfer_data == NULL))
> -             return -1;
> -
>       rte_spinlock_lock(&vq->access_lock);
> -     ret = async_channel_register(vid, queue_id, ops);
> +     ret = async_channel_register(vid, queue_id);
>       rte_spinlock_unlock(&vq->access_lock);
> 
>       return ret;
>  }
> 
>  int
> -rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id,
> -             struct rte_vhost_async_config config,
> -             struct rte_vhost_async_channel_ops *ops)
> +rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id)
>  {
>       struct vhost_virtqueue *vq;
>       struct virtio_net *dev = get_device(vid);
> 
> -     if (dev == NULL || ops == NULL)
> +     if (dev == NULL)
>               return -1;
> 
>       if (queue_id >= VHOST_MAX_VRING)
> @@ -1747,18 +1737,7 @@ rte_vhost_async_channel_register_thread_unsafe(int vid,
> uint16_t queue_id,
>       if (unlikely(vq == NULL || !dev->async_copy))
>               return -1;
> 
> -     if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) {
> -             VHOST_LOG_CONFIG(ERR,
> -                     "async copy is not supported on non-inorder mode "
> -                     "(vid %d, qid: %d)\n", vid, queue_id);
> -             return -1;
> -     }
> -
> -     if (unlikely(ops->check_completed_copies == NULL ||
> -             ops->transfer_data == NULL))
> -             return -1;
> -
> -     return async_channel_register(vid, queue_id, ops);
> +     return async_channel_register(vid, queue_id);
>  }
> 
>  int
> @@ -1835,6 +1814,83 @@ rte_vhost_async_channel_unregister_thread_unsafe(int
> vid, uint16_t queue_id)
>       return 0;
>  }
> 
> +static __rte_always_inline void
> +vhost_free_async_dma_mem(void)
> +{
> +     uint16_t i;
> +
> +     for (i = 0; i < RTE_DMADEV_DEFAULT_MAX; i++) {
> +             struct async_dma_info *dma = &dma_copy_track[i];
> +             int16_t j;
> +
> +             if (dma->max_vchans == 0) {
> +                     continue;
> +             }
> +
> +             for (j = 0; j < dma->max_vchans; j++) {
> +                     rte_free(dma->vchans[j].metadata);
> +             }
> +             rte_free(dma->vchans);
> +             dma->vchans = NULL;
> +             dma->max_vchans = 0;
> +     }
> +}
> +
> +int
> +rte_vhost_async_dma_configure(struct rte_vhost_async_dma_info *dmas, uint16_t
> count)
> +{
> +     uint16_t i;
> +
> +     if (!dmas) {
> +             VHOST_LOG_CONFIG(ERR, "Invalid DMA configuration parameter.\n");
> +             return -1;
> +     }
> +
> +     for (i = 0; i < count; i++) {
> +             struct async_dma_vchan_info *vchans;
> +             int16_t dev_id;
> +             uint16_t max_vchans;
> +             uint16_t max_desc;
> +             uint16_t j;
> +
> +             dev_id = dmas[i].dev_id;
> +             max_vchans = dmas[i].max_vchans;
> +             max_desc = dmas[i].max_desc;
> +
> +             if (!rte_is_power_of_2(max_desc)) {
> +                     max_desc = rte_align32pow2(max_desc);
> +             }

I think when aligning to power of 2, it should exceed not max_desc?
And based on above comment, if this max_desc is nb_desc configured for
vchanl, you should just make sure the nb_desc be power-of-2.

> +
> +             vchans = rte_zmalloc(NULL, sizeof(struct async_dma_vchan_info) *
> max_vchans,
> +                             RTE_CACHE_LINE_SIZE);
> +             if (vchans == NULL) {
> +                     VHOST_LOG_CONFIG(ERR, "Failed to allocate vchans for 
> dma-
> %d."
> +                                     " Cannot enable async data-path.\n", 
> dev_id);
> +                     vhost_free_async_dma_mem();
> +                     return -1;
> +             }
> +
> +             for (j = 0; j < max_vchans; j++) {
> +                     vchans[j].metadata = rte_zmalloc(NULL, sizeof(bool *) *
> max_desc,
> +                                     RTE_CACHE_LINE_SIZE);
> +                     if (!vchans[j].metadata) {
> +                             VHOST_LOG_CONFIG(ERR, "Failed to allocate 
> metadata for
> "
> +                                             "dma-%d vchan-%u\n", dev_id, j);
> +                             vhost_free_async_dma_mem();
> +                             return -1;
> +                     }
> +
> +                     vchans[j].ring_size = max_desc;
> +                     vchans[j].ring_mask = max_desc - 1;
> +             }
> +
> +             dma_copy_track[dev_id].vchans = vchans;
> +             dma_copy_track[dev_id].max_vchans = max_vchans;
> +     }
> +
> +     return 0;
> +}
> +
>  int
>  rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
>  {
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index 7085e0885c..d9bda34e11 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -19,6 +19,7 @@
>  #include <rte_ether.h>
>  #include <rte_rwlock.h>
>  #include <rte_malloc.h>
> +#include <rte_dmadev.h>
> 
>  #include "rte_vhost.h"
>  #include "rte_vdpa.h"
> @@ -50,6 +51,7 @@
> 
>  #define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST)
>  #define VHOST_MAX_ASYNC_VEC 2048
> +#define VHOST_ASYNC_DMA_BATCHING_SIZE 32
> 
>  #define PACKED_DESC_ENQUEUE_USED_FLAG(w)     \
>       ((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \
> @@ -119,6 +121,41 @@ struct vring_used_elem_packed {
>       uint32_t count;
>  };
> 
> +struct async_dma_vchan_info {
> +     /* circular array to track copy metadata */
> +     bool **metadata;

If the metadata will only be flags, maybe just use some
name called XXX_flag

> +
> +     /* max elements in 'metadata' */
> +     uint16_t ring_size;
> +     /* ring index mask for 'metadata' */
> +     uint16_t ring_mask;
> +
> +     /* batching copies before a DMA doorbell */
> +     uint16_t nr_batching;
> +
> +     /**
> +      * DMA virtual channel lock. Although it is able to bind DMA
> +      * virtual channels to data plane threads, vhost control plane
> +      * thread could call data plane functions too, thus causing
> +      * DMA device contention.
> +      *
> +      * For example, in VM exit case, vhost control plane thread needs
> +      * to clear in-flight packets before disable vring, but there could
> +      * be anotther data plane thread is enqueuing packets to the same
> +      * vring with the same DMA virtual channel. But dmadev PMD functions
> +      * are lock-free, so the control plane and data plane threads
> +      * could operate the same DMA virtual channel at the same time.
> +      */
> +     rte_spinlock_t dma_lock;
> +};
> +
> +struct async_dma_info {
> +     uint16_t max_vchans;
> +     struct async_dma_vchan_info *vchans;
> +};
> +
> +extern struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX];
> +
>  /**
>   * inflight async packet information
>   */
> @@ -129,9 +166,6 @@ struct async_inflight_info {
>  };
> 
>  struct vhost_async {
> -     /* operation callbacks for DMA */
> -     struct rte_vhost_async_channel_ops ops;
> -
>       struct rte_vhost_iov_iter iov_iter[VHOST_MAX_ASYNC_IT];
>       struct rte_vhost_iovec iovec[VHOST_MAX_ASYNC_VEC];
>       uint16_t iter_idx;
> @@ -139,6 +173,19 @@ struct vhost_async {
> 
>       /* data transfer status */
>       struct async_inflight_info *pkts_info;
> +     /**
> +      * packet reorder array. "true" indicates that DMA
> +      * device completes all copies for the packet.
> +      *
> +      * Note that this array could be written by multiple
> +      * threads at the same time. For example, two threads
> +      * enqueue packets to the same virtqueue with their
> +      * own DMA devices. However, since offloading is
> +      * per-packet basis, each packet flag will only be
> +      * written by one thread. And single byte write is
> +      * atomic, so no lock is needed.
> +      */
> +     bool *pkts_cmpl_flag;
>       uint16_t pkts_idx;
>       uint16_t pkts_inflight_n;
>       union {
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index b3d954aab4..9f81fc9733 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -11,6 +11,7 @@
>  #include <rte_net.h>
>  #include <rte_ether.h>
>  #include <rte_ip.h>
> +#include <rte_dmadev.h>
>  #include <rte_vhost.h>
>  #include <rte_tcp.h>
>  #include <rte_udp.h>
> @@ -25,6 +26,9 @@
> 
>  #define MAX_BATCH_LEN 256
> 
> +/* DMA device copy operation tracking array. */
> +struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX];
> +
>  static  __rte_always_inline bool
>  rxvq_is_mergeable(struct virtio_net *dev)
>  {
> @@ -43,6 +47,108 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t
> nr_vring)
>       return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
>  }
> 
> +static __rte_always_inline uint16_t
> +vhost_async_dma_transfer(struct vhost_virtqueue *vq, int16_t dma_id,
> +             uint16_t vchan, uint16_t head_idx,
> +             struct rte_vhost_iov_iter *pkts, uint16_t nr_pkts)
> +{
> +     struct async_dma_vchan_info *dma_info =
> &dma_copy_track[dma_id].vchans[vchan];
> +     uint16_t ring_mask = dma_info->ring_mask;
> +     uint16_t pkt_idx;
> +
> +     rte_spinlock_lock(&dma_info->dma_lock);
> +
> +     for (pkt_idx = 0; pkt_idx < nr_pkts; pkt_idx++) {
> +             struct rte_vhost_iovec *iov = pkts[pkt_idx].iov;
> +             int copy_idx = 0;
> +             uint16_t nr_segs = pkts[pkt_idx].nr_segs;
> +             uint16_t i;
> +
> +             if (rte_dma_burst_capacity(dma_id, vchan) < nr_segs) {
> +                     goto out;
> +             }
> +
> +             for (i = 0; i < nr_segs; i++) {
> +                     /**
> +                      * We have checked the available space before submit 
> copies
> to DMA
> +                      * vChannel, so we don't handle error here.
> +                      */
> +                     copy_idx = rte_dma_copy(dma_id, vchan,
> (rte_iova_t)iov[i].src_addr,
> +                                     (rte_iova_t)iov[i].dst_addr, iov[i].len,
> +                                     RTE_DMA_OP_FLAG_LLC);

This assumes rte_dma_copy will always succeed if there's available space.

But the API doxygen says:

* @return
 *   - 0..UINT16_MAX: index of enqueued job.
 *   - -ENOSPC: if no space left to enqueue.
 *   - other values < 0 on failure.

So it should consider other vendor-specific errors. 

Thanks,
Chenbo

> +
> +                     /**
> +                      * Only store packet completion flag address in the last
> copy's
> +                      * slot, and other slots are set to NULL.
> +                      */
> +                     if (unlikely(i == (nr_segs - 1))) {
> +                             dma_info->metadata[copy_idx & ring_mask] =
> +                                     &vq->async->pkts_cmpl_flag[head_idx % 
> vq->size];
> +                     }
> +             }
> +
> +             dma_info->nr_batching += nr_segs;
> +             if (unlikely(dma_info->nr_batching >=
> VHOST_ASYNC_DMA_BATCHING_SIZE)) {
> +                     rte_dma_submit(dma_id, vchan);
> +                     dma_info->nr_batching = 0;
> +             }
> +
> +             head_idx++;
> +     }
> +
> +out:
> +     if (dma_info->nr_batching > 0) {
> +             rte_dma_submit(dma_id, vchan);
> +             dma_info->nr_batching = 0;
> +     }
> +     rte_spinlock_unlock(&dma_info->dma_lock);
> +
> +     return pkt_idx;
> +}
> +
> +static __rte_always_inline uint16_t
> +vhost_async_dma_check_completed(int16_t dma_id, uint16_t vchan, uint16_t
> max_pkts)
> +{
> +     struct async_dma_vchan_info *dma_info =
> &dma_copy_track[dma_id].vchans[vchan];
> +     uint16_t ring_mask = dma_info->ring_mask;
> +     uint16_t last_idx = 0;
> +     uint16_t nr_copies;
> +     uint16_t copy_idx;
> +     uint16_t i;
> +
> +     rte_spinlock_lock(&dma_info->dma_lock);
> +
> +     /**
> +      * Since all memory is pinned and addresses should be valid,
> +      * we don't check errors.
> +      */
> +     nr_copies = rte_dma_completed(dma_id, vchan, max_pkts, &last_idx, NULL);
> +     if (nr_copies == 0) {
> +             goto out;
> +     }
> +
> +     copy_idx = last_idx - nr_copies + 1;
> +     for (i = 0; i < nr_copies; i++) {
> +             bool *flag;
> +
> +             flag = dma_info->metadata[copy_idx & ring_mask];
> +             if (flag) {
> +                     /**
> +                      * Mark the packet flag as received. The flag
> +                      * could belong to another virtqueue but write
> +                      * is atomic.
> +                      */
> +                     *flag = true;
> +                     dma_info->metadata[copy_idx & ring_mask] = NULL;
> +             }
> +             copy_idx++;
> +     }
> +
> +out:
> +     rte_spinlock_unlock(&dma_info->dma_lock);
> +     return nr_copies;
> +}
> +
>  static inline void
>  do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
>  {
> @@ -1449,9 +1555,9 @@ store_dma_desc_info_packed(struct vring_used_elem_packed
> *s_ring,
>  }
> 
>  static __rte_noinline uint32_t
> -virtio_dev_rx_async_submit_split(struct virtio_net *dev,
> -     struct vhost_virtqueue *vq, uint16_t queue_id,
> -     struct rte_mbuf **pkts, uint32_t count)
> +virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
> +             uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count,
> +             int16_t dma_id, uint16_t vchan)
>  {
>       struct buf_vector buf_vec[BUF_VECTOR_MAX];
>       uint32_t pkt_idx = 0;
> @@ -1503,17 +1609,16 @@ virtio_dev_rx_async_submit_split(struct virtio_net
> *dev,
>       if (unlikely(pkt_idx == 0))
>               return 0;
> 
> -     n_xfer = async->ops.transfer_data(dev->vid, queue_id, async->iov_iter, 
> 0,
> pkt_idx);
> -     if (unlikely(n_xfer < 0)) {
> -             VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue
> id %d.\n",
> -                             dev->vid, __func__, queue_id);
> -             n_xfer = 0;
> -     }
> +     n_xfer = vhost_async_dma_transfer(vq, dma_id, vchan, async->pkts_idx,
> async->iov_iter,
> +                     pkt_idx);
> 
>       pkt_err = pkt_idx - n_xfer;
>       if (unlikely(pkt_err)) {
>               uint16_t num_descs = 0;
> 
> +             VHOST_LOG_DATA(DEBUG, "(%d) %s: failed to transfer %u packets 
> for
> queue %u.\n",
> +                             dev->vid, __func__, pkt_err, queue_id);
> +
>               /* update number of completed packets */
>               pkt_idx = n_xfer;
> 
> @@ -1656,13 +1761,13 @@ dma_error_handler_packed(struct vhost_virtqueue *vq,
> uint16_t slot_idx,
>  }
> 
>  static __rte_noinline uint32_t
> -virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
> -     struct vhost_virtqueue *vq, uint16_t queue_id,
> -     struct rte_mbuf **pkts, uint32_t count)
> +virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
> +             uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count,
> +             int16_t dma_id, uint16_t vchan)
>  {
>       uint32_t pkt_idx = 0;
>       uint32_t remained = count;
> -     int32_t n_xfer;
> +     uint16_t n_xfer;
>       uint16_t num_buffers;
>       uint16_t num_descs;
> 
> @@ -1670,6 +1775,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
> *dev,
>       struct async_inflight_info *pkts_info = async->pkts_info;
>       uint32_t pkt_err = 0;
>       uint16_t slot_idx = 0;
> +     uint16_t head_idx = async->pkts_idx % vq->size;
> 
>       do {
>               rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
> @@ -1694,19 +1800,17 @@ virtio_dev_rx_async_submit_packed(struct virtio_net
> *dev,
>       if (unlikely(pkt_idx == 0))
>               return 0;
> 
> -     n_xfer = async->ops.transfer_data(dev->vid, queue_id, async->iov_iter, 
> 0,
> pkt_idx);
> -     if (unlikely(n_xfer < 0)) {
> -             VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue
> id %d.\n",
> -                             dev->vid, __func__, queue_id);
> -             n_xfer = 0;
> -     }
> -
> -     pkt_err = pkt_idx - n_xfer;
> +     n_xfer = vhost_async_dma_transfer(vq, dma_id, vchan, head_idx,
> +                     async->iov_iter, pkt_idx);
> 
>       async_iter_reset(async);
> 
> -     if (unlikely(pkt_err))
> +     pkt_err = pkt_idx - n_xfer;
> +     if (unlikely(pkt_err)) {
> +             VHOST_LOG_DATA(DEBUG, "(%d) %s: failed to transfer %u packets 
> for
> queue %u.\n",
> +                             dev->vid, __func__, pkt_err, queue_id);
>               dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx);
> +     }
> 
>       if (likely(vq->shadow_used_idx)) {
>               /* keep used descriptors. */
> @@ -1826,28 +1930,37 @@ write_back_completed_descs_packed(struct
> vhost_virtqueue *vq,
> 
>  static __rte_always_inline uint16_t
>  vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
> -             struct rte_mbuf **pkts, uint16_t count)
> +             struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
> +             uint16_t vchan)
>  {
>       struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
>       struct vhost_async *async = vq->async;
>       struct async_inflight_info *pkts_info = async->pkts_info;
> -     int32_t n_cpl;
> +     uint16_t nr_cpl_pkts = 0;
>       uint16_t n_descs = 0, n_buffers = 0;
>       uint16_t start_idx, from, i;
> 
> -     n_cpl = async->ops.check_completed_copies(dev->vid, queue_id, 0, count);
> -     if (unlikely(n_cpl < 0)) {
> -             VHOST_LOG_DATA(ERR, "(%d) %s: failed to check completed copies 
> for
> queue id %d.\n",
> -                             dev->vid, __func__, queue_id);
> -             return 0;
> -     }
> -
> -     if (n_cpl == 0)
> -             return 0;
> +     /* Check completed copies for the given DMA vChannel */
> +     vhost_async_dma_check_completed(dma_id, vchan, count);
> 
>       start_idx = async_get_first_inflight_pkt_idx(vq);
> 
> -     for (i = 0; i < n_cpl; i++) {
> +     /**
> +      * Calculate the number of copy completed packets.
> +      * Note that there may be completed packets even if
> +      * no copies are reported done by the given DMA vChannel,
> +      * as DMA vChannels could be shared by other threads.
> +      */
> +     from = start_idx;
> +     while (vq->async->pkts_cmpl_flag[from] && count--) {
> +             vq->async->pkts_cmpl_flag[from] = false;
> +             from++;
> +             if (from >= vq->size)
> +                     from -= vq->size;
> +             nr_cpl_pkts++;
> +     }
> +
> +     for (i = 0; i < nr_cpl_pkts; i++) {
>               from = (start_idx + i) % vq->size;
>               /* Only used with packed ring */
>               n_buffers += pkts_info[from].nr_buffers;
> @@ -1856,7 +1969,7 @@ vhost_poll_enqueue_completed(struct virtio_net *dev,
> uint16_t queue_id,
>               pkts[i] = pkts_info[from].mbuf;
>       }
> 
> -     async->pkts_inflight_n -= n_cpl;
> +     async->pkts_inflight_n -= nr_cpl_pkts;
> 
>       if (likely(vq->enabled && vq->access_ok)) {
>               if (vq_is_packed(dev)) {
> @@ -1877,12 +1990,13 @@ vhost_poll_enqueue_completed(struct virtio_net *dev,
> uint16_t queue_id,
>               }
>       }
> 
> -     return n_cpl;
> +     return nr_cpl_pkts;
>  }
> 
>  uint16_t
>  rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> -             struct rte_mbuf **pkts, uint16_t count)
> +             struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
> +             uint16_t vchan)
>  {
>       struct virtio_net *dev = get_device(vid);
>       struct vhost_virtqueue *vq;
> @@ -1908,7 +2022,7 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t
> queue_id,
> 
>       rte_spinlock_lock(&vq->access_lock);
> 
> -     n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
> +     n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count,
> dma_id, vchan);
> 
>       rte_spinlock_unlock(&vq->access_lock);
> 
> @@ -1917,7 +2031,8 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t
> queue_id,
> 
>  uint16_t
>  rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
> -             struct rte_mbuf **pkts, uint16_t count)
> +             struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
> +             uint16_t vchan)
>  {
>       struct virtio_net *dev = get_device(vid);
>       struct vhost_virtqueue *vq;
> @@ -1941,14 +2056,14 @@ rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t
> queue_id,
>               return 0;
>       }
> 
> -     n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
> +     n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count,
> dma_id, vchan);
> 
>       return n_pkts_cpl;
>  }
> 
>  static __rte_always_inline uint32_t
>  virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
> -     struct rte_mbuf **pkts, uint32_t count)
> +     struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan)
>  {
>       struct vhost_virtqueue *vq;
>       uint32_t nb_tx = 0;
> @@ -1980,10 +2095,10 @@ virtio_dev_rx_async_submit(struct virtio_net *dev,
> uint16_t queue_id,
> 
>       if (vq_is_packed(dev))
>               nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, queue_id,
> -                             pkts, count);
> +                             pkts, count, dma_id, vchan);
>       else
>               nb_tx = virtio_dev_rx_async_submit_split(dev, vq, queue_id,
> -                             pkts, count);
> +                             pkts, count, dma_id, vchan);
> 
>  out:
>       if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> @@ -1997,7 +2112,8 @@ virtio_dev_rx_async_submit(struct virtio_net *dev,
> uint16_t queue_id,
> 
>  uint16_t
>  rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
> -             struct rte_mbuf **pkts, uint16_t count)
> +             struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
> +             uint16_t vchan)
>  {
>       struct virtio_net *dev = get_device(vid);
> 
> @@ -2011,7 +2127,7 @@ rte_vhost_submit_enqueue_burst(int vid, uint16_t
> queue_id,
>               return 0;
>       }
> 
> -     return virtio_dev_rx_async_submit(dev, queue_id, pkts, count);
> +     return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, dma_id,
> vchan);
>  }
> 
>  static inline bool
> --
> 2.25.1

RE: [PATCH v1 1/1] vhost: integrate dmadev in asynchronous datapath

Reply via email to