Hi Chenbo, > -----Original Message----- > From: Xia, Chenbo <chenbo....@intel.com> > Sent: Tuesday, June 23, 2020 9:54 AM > To: Jiang, Cheng1 <cheng1.ji...@intel.com>; maxime.coque...@redhat.com; > Wang, Zhihong <zhihong.w...@intel.com> > Cc: dev@dpdk.org; Fu, Patrick <patrick...@intel.com>; Liang, Cunming > <cunming.li...@intel.com> > Subject: RE: [RFC] example/vhost: add support for vhost async data path > > Hi Cheng, > > > -----Original Message----- > > From: Jiang, Cheng1 <cheng1.ji...@intel.com> > > Sent: Monday, June 22, 2020 10:59 AM > > To: maxime.coque...@redhat.com; Xia, Chenbo <chenbo....@intel.com>; > > Wang, Zhihong <zhihong.w...@intel.com> > > Cc: dev@dpdk.org; Fu, Patrick <patrick...@intel.com>; Liang, Cunming > > <cunming.li...@intel.com>; Jiang, Cheng1 <cheng1.ji...@intel.com> > > Subject: [RFC] example/vhost: add support for vhost async data path > > > > This patch makes vhost-vswitch be able to use vhost asynchronous api > > for enqueue operations. Demonstrated how the application leverage IOAT > > DMA channel with vhost async api. Since this is an early preview > > patch, the performance has not been fully optimized and it's not > > suggested to use this patch as a tool for benchmark. > > > > We introduce two parameters to enable DMA acceleration for Tx > > operations of > > queues: > > –async_vhost_driver Async vhost-user net driver which demonstrates how > > to use the async vhost APIs will be used when this option is given. It > > is disabled by default. > > > > -dmas This parameter is used to specify the assigned DMA device of a > queue. > > > > This patch depends on following patch set: > > http://patches.dpdk.org/cover/71265/ > > > > Signed-off-by: Cheng Jiang <cheng1.ji...@intel.com> > > --- > > examples/vhost/main.c | 246 > > +++++++++++++++++++++++++++++++++++++++++- > > examples/vhost/main.h | 1 + > > 2 files changed, 243 insertions(+), 4 deletions(-) > > > > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index > > ab649bf14..46dd282e0 100644 > > --- a/examples/vhost/main.c > > +++ b/examples/vhost/main.c > > @@ -24,11 +24,15 @@ > > #include <rte_ip.h> > > #include <rte_tcp.h> > > #include <rte_pause.h> > > +#include <rte_vhost_async.h> > > +#include <rte_rawdev.h> > > +#include <rte_ioat_rawdev.h> > > +#include <rte_pci.h> > > > > #include "main.h" > > > > #ifndef MAX_QUEUES > > -#define MAX_QUEUES 128 > > +#define MAX_QUEUES 512 > > #endif > > > > /* the maximum number of external ports supported */ @@ -58,6 +62,10 > > @@ > > /* Maximum long option length for option parsing. */ #define > > MAX_LONG_OPT_SZ 64 > > > > +#define IOAT_RING_SIZE 4096 > > + > > +#define MAX_ENQUEUED_SIZE 2048 > > + > > /* mask of enabled ports */ > > static uint32_t enabled_port_mask = 0; > > > > @@ -96,6 +104,20 @@ static int dequeue_zero_copy; > > > > static int builtin_net_driver; > > > > +static int async_vhost_driver; > > + > > +struct dma_info { > > + struct rte_pci_addr addr; > > + uint16_t dev_id; > > + bool is_valid; > > +}; > > + > > +struct dma_info_input { > > + struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2]; > > + uint16_t nr; > > +}; > > + > > +static struct dma_info_input dma_bind[20]; > > Should '20' be MAX_VHOST_DEVICE as this is indexed by vid? > Sure, you are right, MAX_VHOST_DEVICE will be used in the next version.
> > /* Specify timeout (in useconds) between retries on RX. */ static > > uint32_t burst_rx_delay_time = BURST_RX_WAIT_US; > > /* Specify the number of retries on RX. */ @@ -141,6 +163,61 @@ > > static struct rte_eth_conf vmdq_conf_default = { > > }, > > }; > > > > +static int > > +ioat_transfer_data_cb(int vid, uint16_t queue_id, struct > > +dma_trans_desc > > *descs, > > + struct dma_trans_status *opaque_data, uint16_t count) { > > + int ret; > > + uint16_t i_desc; > > + > > + struct iov_it *src = NULL; > > + struct iov_it *dst = NULL; > > + unsigned long i_seg; > > + > > + int dev_id = dma_bind[vid].dmas[queue_id * 2 + > VIRTIO_RXQ].dev_id; > > + if (likely(!opaque_data)) { > > + for (i_desc = 0; i_desc < count; i_desc++) { > > + src = descs[i_desc].src; > > + dst = descs[i_desc].dst; > > + i_seg = 0; > > + while (i_seg < src->nr_segs) { > > + ret = rte_ioat_enqueue_copy(dev_id, > > + (uintptr_t)(src->iov[i_seg].iov_base) > > + + src->offset, > > + (uintptr_t)(dst->iov[i_seg].iov_base) > > + + dst->offset, > > + src->iov[i_seg].iov_len, > > + 0, > > + 0, > > + 0); > > + if (ret != 1) > > + break; > > + i_seg++; > > + } > > + } > > + } else { > > + /* Opaque data is not supported */ > > + return -1; > > + } > > + /* ring the doolbell */ > > s/doolbell/doorbell > I'll fix in the next version. > > + rte_ioat_do_copies(dev_id); > > + return i_desc; > > +} > > + > > +static int > > +ioat_check_completed_copies_cb(int vid, uint16_t queue_id, > > + struct dma_trans_status *opaque_data, > > + uint16_t max_packets __rte_unused) > > +{ > > + if (!opaque_data) { > > + uintptr_t dump[255]; > > + return > > rte_ioat_completed_copies(dma_bind[vid].dmas[queue_id * 2 > > + + VIRTIO_RXQ].dev_id, 255, dump, dump); > > + } else { > > + /* Opaque data is not supported */ > > + return -1; > > + } > > +} > > > > static unsigned lcore_ids[RTE_MAX_LCORE]; static uint16_t > > ports[RTE_MAX_ETHPORTS]; @@ -186,6 +263,94 @@ struct mbuf_table > > lcore_tx_queue[RTE_MAX_LCORE]; > > * Builds up the correct configuration for VMDQ VLAN pool map > > * according to the pool & queue limits. > > */ > > + > > +static inline int > > +open_dma(const char *value, void *dma_bind_info) { > > + struct dma_info_input *dma_info = dma_bind_info; > > + char *input = strndup(value, strlen(value) + 1); > > + char *addrs = input; > > + char *ptrs[2]; > > + char *start, *end, *substr; > > + int64_t qid, vring_id; > > + struct rte_ioat_rawdev_config config; > > + struct rte_rawdev_info info = { .dev_private = &config }; > > + char name[32]; > > + int dev_id; > > + int ret = 0; > > + > > + while (isblank(*addrs)) > > + addrs++; > > + if (*addrs == '\0') { > > + ret = -1; > > + goto out; > > + } > > + > > + /* process DMA devices within bracket. */ > > + addrs++; > > + substr = strtok(addrs, ";]"); > > + if (!substr) { > > + ret = -1; > > + goto out; > > + } > > + > > + do { > > + rte_strsplit(substr, strlen(substr), ptrs, 2, '@'); > > + > > + start = strstr(ptrs[0], "txq"); > > + if (start == NULL) { > > + ret = -1; > > + goto out; > > + } > > + > > + start += 3; > > + qid = strtol(start, &end, 0); > > + if (end == start) { > > + ret = -1; > > + goto out; > > + } > > + > > + vring_id = qid * 2 + VIRTIO_RXQ; > > + if (rte_pci_addr_parse(ptrs[1], > > + &dma_info->dmas[vring_id].addr) < 0) { > > + ret = -1; > > + goto out; > > + } > > + > > + rte_pci_device_name(&dma_info->dmas[vring_id].addr, > > + name, sizeof(name)); > > + dev_id = rte_rawdev_get_dev_id(name); > > + if (dev_id == (uint16_t)(-ENODEV) || > > + dev_id == (uint16_t)(-EINVAL)) { > > + ret = -1; > > + goto out; > > + } > > + > > + if (rte_rawdev_info_get(dev_id, &info) < 0 || > > + strstr(info.driver_name, "ioat") == NULL) { > > + ret = -1; > > + goto out; > > + } > > + > > + dma_info->dmas[vring_id].dev_id = dev_id; > > + dma_info->dmas[vring_id].is_valid = true; > > + config.ring_size = IOAT_RING_SIZE; > > + if (rte_rawdev_configure(dev_id, &info) < 0) { > > + ret = -1; > > + goto out; > > + } > > + rte_rawdev_start(dev_id); > > + > > + dma_info->nr++; > > + > > + substr = strtok(NULL, ";]"); > > + } while (substr); > > + > > +out: > > + free(input); > > + return ret; > > +} > > + > > static inline int > > get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices) { > > @@ - > > 488,6 +653,8 @@ us_vhost_parse_args(int argc, char **argv) > > {"client", no_argument, &client_mode, 1}, > > {"dequeue-zero-copy", no_argument, > &dequeue_zero_copy, 1}, > > {"builtin-net-driver", no_argument, &builtin_net_driver, 1}, > > + {"async_vhost_driver", no_argument, &async_vhost_driver, > 1}, > > + {"dmas", required_argument, NULL, 0}, > > {NULL, 0, 0, 0}, > > }; > > > > @@ -623,13 +790,25 @@ us_vhost_parse_args(int argc, char **argv) > > "socket-file", > > MAX_LONG_OPT_SZ)) { > > if (us_vhost_parse_socket_path(optarg) == - > 1) { > > RTE_LOG(INFO, VHOST_CONFIG, > > - "Invalid argument for socket name > > (Max %d characters)\n", > > - PATH_MAX); > > + "Invalid argument for socket > > name (Max %d characters)\n", > > + PATH_MAX); > > us_vhost_usage(prgname); > > return -1; > > } > > } > > > > + if (!strncmp(long_option[option_index].name, > > + "dmas", > MAX_LONG_OPT_SZ)) > > { > > + if (open_dma(optarg, &(dma_bind[0])) == -1) > { > > + if (*optarg == -1) { > > + RTE_LOG(INFO, > > VHOST_CONFIG, > > + "Wrong DMA args\n"); > > + us_vhost_usage(prgname); > > + return -1; > > + } > > + } > > + } > > + > > break; > > > > /* Invalid option - print options. */ @@ -785,9 > +964,26 @@ > > virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev, > > struct rte_mbuf *m) > > { > > uint16_t ret; > > + struct rte_mbuf *m_cpl[1]; > > > > if (builtin_net_driver) { > > ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1); > > + } else if (async_vhost_driver) { > > + ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, > > VIRTIO_RXQ, > > + &m, 1); > > + > > + if (likely(ret)) { > > + dst_vdev->nr_async_pkts++; > > + rte_mbuf_refcnt_update(m, 1); > > + } > > + > > + while (likely(dst_vdev->nr_async_pkts)) { > > + dst_vdev->nr_async_pkts = > > + > rte_vhost_poll_enqueue_completed(dst_vdev- > > >vid, > > + VIRTIO_RXQ, m_cpl, 1); > > I think nr_async_pkts should be changed only when > rte_vhost_poll_enqueue_completed succeed? > > Thanks! > Chenbo > Yes, make sense, I'll update in the next version. Thanks, Cheng > > + dst_vdev->nr_async_pkts--; > > + rte_pktmbuf_free(*m_cpl); > > + } > > } else { > > ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, > &m, 1); > > } > > @@ -1036,6 +1232,19 @@ drain_mbuf_table(struct mbuf_table *tx_q) > > } > > } > > > > +static __rte_always_inline void > > +complete_async_pkts(struct vhost_dev *vdev, uint16_t qid) { > > + struct rte_mbuf *p_cpl[MAX_PKT_BURST]; > > + uint16_t complete_count; > > + > > + complete_count = rte_vhost_poll_enqueue_completed(vdev->vid, > > + qid, p_cpl, MAX_PKT_BURST); > > + vdev->nr_async_pkts -= complete_count; > > + if (complete_count) > > + free_pkts(p_cpl, complete_count); > > +} > > + > > static __rte_always_inline void > > drain_eth_rx(struct vhost_dev *vdev) > > { > > @@ -1044,6 +1253,10 @@ drain_eth_rx(struct vhost_dev *vdev) > > > > rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q, > > pkts, MAX_PKT_BURST); > > + > > + while (likely(vdev->nr_async_pkts)) > > + complete_async_pkts(vdev, VIRTIO_RXQ); > > + > > if (!rx_count) > > return; > > > > @@ -1068,16 +1281,22 @@ drain_eth_rx(struct vhost_dev *vdev) > > if (builtin_net_driver) { > > enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ, > > pkts, rx_count); > > + } else if (async_vhost_driver) { > > + enqueue_count = rte_vhost_submit_enqueue_burst(vdev- > >vid, > > + VIRTIO_RXQ, pkts, rx_count); > > + vdev->nr_async_pkts += enqueue_count; > > } else { > > enqueue_count = rte_vhost_enqueue_burst(vdev->vid, > > VIRTIO_RXQ, > > pkts, rx_count); > > } > > + > > if (enable_stats) { > > rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count); > > rte_atomic64_add(&vdev->stats.rx_atomic, > enqueue_count); > > } > > > > - free_pkts(pkts, rx_count); > > + if (!async_vhost_driver) > > + free_pkts(pkts, rx_count); > > } > > > > static __rte_always_inline void > > @@ -1224,6 +1443,9 @@ destroy_device(int vid) > > "(%d) device has been removed from data core\n", > > vdev->vid); > > > > + if (async_vhost_driver) > > + rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ); > > + > > rte_free(vdev); > > } > > > > @@ -1238,6 +1460,12 @@ new_device(int vid) > > uint32_t device_num_min = num_devices; > > struct vhost_dev *vdev; > > > > + struct rte_vhost_async_channel_ops channel_ops = { > > + .transfer_data = ioat_transfer_data_cb, > > + .check_completed_copies = > ioat_check_completed_copies_cb > > + }; > > + struct dma_channel_features f; > > + > > vdev = rte_zmalloc("vhost device", sizeof(*vdev), > > RTE_CACHE_LINE_SIZE); > > if (vdev == NULL) { > > RTE_LOG(INFO, VHOST_DATA, > > @@ -1278,6 +1506,13 @@ new_device(int vid) > > "(%d) device has been added to data core %d\n", > > vid, vdev->coreid); > > > > + if (async_vhost_driver) { > > + f.inorder = 1; > > + f.threshold = 256; > > + return rte_vhost_async_channel_register(vid, VIRTIO_RXQ, > > + f.intval, &channel_ops); > > + } > > + > > return 0; > > } > > > > @@ -1517,6 +1752,9 @@ main(int argc, char *argv[]) > > /* Register vhost user driver to handle vhost messages. */ > > for (i = 0; i < nb_sockets; i++) { > > char *file = socket_files + i * PATH_MAX; > > + if (async_vhost_driver) > > + flags = flags | RTE_VHOST_USER_ASYNC_COPY; > > + > > ret = rte_vhost_driver_register(file, flags); > > if (ret != 0) { > > unregister_drivers(i); > > diff --git a/examples/vhost/main.h b/examples/vhost/main.h index > > 7cba0edbf..4317b6ae8 100644 > > --- a/examples/vhost/main.h > > +++ b/examples/vhost/main.h > > @@ -51,6 +51,7 @@ struct vhost_dev { > > uint64_t features; > > size_t hdr_len; > > uint16_t nr_vrings; > > + uint16_t nr_async_pkts; > > struct rte_vhost_memory *mem; > > struct device_statistics stats; > > TAILQ_ENTRY(vhost_dev) global_vdev_entry; > > -- > > 2.26.2