Hi Maxime, > -----Original Message----- > From: Maxime Coquelin <mcoqu...@redhat.com> > Sent: Monday, June 7, 2021 9:46 PM > To: Jiang, Cheng1 <cheng1.ji...@intel.com>; maxime.coque...@redhat.com; > Xia, Chenbo <chenbo....@intel.com> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu...@intel.com>; Yang, YvonneX > <yvonnex.y...@intel.com> > Subject: Re: [PATCH 1/2] vhost: add unsafe API to drain pkts in async vhost > > > > On 6/2/21 6:28 AM, Cheng Jiang wrote: > > Applications need to stop DMA transfers and finish all the in-flight > > pkts when in VM memory hot-plug case and async vhost is used. This > > patch is to provide an unsafe API to drain in-flight pkts which are > > submitted to DMA engine in vhost async data path. And enable it in > > vhost example. > > > > Signed-off-by: Cheng Jiang <cheng1.ji...@intel.com> > > --- > > examples/vhost/main.c | 48 +++++++++++++++++++- > > examples/vhost/main.h | 1 + > > lib/vhost/rte_vhost_async.h | 22 +++++++++ > > lib/vhost/version.map | 3 ++ > > lib/vhost/virtio_net.c | 90 +++++++++++++++++++++++++++---------- > > 5 files changed, 139 insertions(+), 25 deletions(-) > > Please split example and lib changes in dedicated patches.
Sure, it will be fixed in then next version. Thanks, Cheng > > > > > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index > > d2179eadb9..70bb67c7f8 100644 > > --- a/examples/vhost/main.c > > +++ b/examples/vhost/main.c > > @@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev) > > > > complete_count = rte_vhost_poll_enqueue_completed(vdev->vid, > > VIRTIO_RXQ, p_cpl, > MAX_PKT_BURST); > > - if (complete_count) > > + if (complete_count) { > > free_pkts(p_cpl, complete_count); > > + __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, > __ATOMIC_SEQ_CST); > > + } > > + > > } > > > > static __rte_always_inline void > > @@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev) > > complete_async_pkts(vdev); > > ret = rte_vhost_submit_enqueue_burst(vdev->vid, > VIRTIO_RXQ, > > m, nr_xmit, m_cpu_cpl, > &cpu_cpl_nr); > > + __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, > > +__ATOMIC_SEQ_CST); > > > > if (cpu_cpl_nr) > > free_pkts(m_cpu_cpl, cpu_cpl_nr); > > @@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev) > > enqueue_count = rte_vhost_submit_enqueue_burst(vdev- > >vid, > > VIRTIO_RXQ, pkts, rx_count, > > m_cpu_cpl, &cpu_cpl_nr); > > + __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count > - cpu_cpl_nr, > > + __ATOMIC_SEQ_CST); > > + > > if (cpu_cpl_nr) > > free_pkts(m_cpu_cpl, cpu_cpl_nr); > > > > @@ -1397,8 +1404,15 @@ destroy_device(int vid) > > "(%d) device has been removed from data core\n", > > vdev->vid); > > > > - if (async_vhost_driver) > > + if (async_vhost_driver) { > > + uint16_t n_pkt = 0; > > + struct rte_mbuf *m_cpl[vdev->pkts_inflight]; > > + n_pkt = rte_vhost_drain_queue_thread_unsafe(vid, > VIRTIO_RXQ, m_cpl, > > + vdev->pkts_inflight); > > + > > + free_pkts(m_cpl, n_pkt); > > rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ); > > + } > > > > rte_free(vdev); > > } > > @@ -1487,6 +1501,35 @@ new_device(int vid) > > return 0; > > } > > > > +static int > > +vring_state_changed(int vid, uint16_t queue_id, int enable) { > > + struct vhost_dev *vdev = NULL; > > + > > + TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { > > + if (vdev->vid == vid) > > + break; > > + } > > + if (!vdev) > > + return -1; > > + > > + if (queue_id != VIRTIO_RXQ) > > + return 0; > > + > > + if (async_vhost_driver) { > > + if (!enable) { > > + uint16_t n_pkt; > > + struct rte_mbuf *m_cpl[vdev->pkts_inflight]; > > + > > + n_pkt = rte_vhost_drain_queue_thread_unsafe(vid, > queue_id, > > + m_cpl, vdev- > >pkts_inflight); > > + free_pkts(m_cpl, n_pkt); > > + } > > + } > > + > > + return 0; > > +} > > + > > /* > > * These callback allow devices to be added to the data core when > configuration > > * has been fully complete. > > @@ -1495,6 +1538,7 @@ static const struct vhost_device_ops > > virtio_net_device_ops = { > > .new_device = new_device, > > .destroy_device = destroy_device, > > + .vring_state_changed = vring_state_changed, > > }; > > > > /* > > diff --git a/examples/vhost/main.h b/examples/vhost/main.h index > > 0ccdce4b4a..e7b1ac60a6 100644 > > --- a/examples/vhost/main.h > > +++ b/examples/vhost/main.h > > @@ -51,6 +51,7 @@ struct vhost_dev { > > uint64_t features; > > size_t hdr_len; > > uint16_t nr_vrings; > > + uint16_t pkts_inflight; > > struct rte_vhost_memory *mem; > > struct device_statistics stats; > > TAILQ_ENTRY(vhost_dev) global_vdev_entry; diff --git > > a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h index > > 6faa31f5ad..041f40cf04 100644 > > --- a/lib/vhost/rte_vhost_async.h > > +++ b/lib/vhost/rte_vhost_async.h > > @@ -193,4 +193,26 @@ __rte_experimental uint16_t > > rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, > > struct rte_mbuf **pkts, uint16_t count); > > > > +/** > > + * This function checks async completion status and empty all pakcets > > + * for a specific vhost device queue. Packets which are inflight will > > + * be returned in an array. > > + * > > + * @note This function does not perform any locking > > + * > > + * @param vid > > + * id of vhost device to enqueue data > > + * @param queue_id > > + * queue id to enqueue data > > + * @param pkts > > + * blank array to get return packet pointer > > + * @param count > > + * size of the packet array > > + * @return > > + * num of packets returned > > + */ > > +__rte_experimental > > +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t > queue_id, > > + struct rte_mbuf **pkts, uint16_t count); > > + > > #endif /* _RTE_VHOST_ASYNC_H_ */ > > diff --git a/lib/vhost/version.map b/lib/vhost/version.map index > > 9103a23cd4..f480f188af 100644 > > --- a/lib/vhost/version.map > > +++ b/lib/vhost/version.map > > @@ -79,4 +79,7 @@ EXPERIMENTAL { > > > > # added in 21.05 > > rte_vhost_get_negotiated_protocol_features; > > + > > + # added in 21.08 > > + rte_vhost_drain_queue_thread_unsafe; > > }; > > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index > > 8da8a86a10..793510974a 100644 > > --- a/lib/vhost/virtio_net.c > > +++ b/lib/vhost/virtio_net.c > > @@ -2082,36 +2082,18 @@ write_back_completed_descs_packed(struct > vhost_virtqueue *vq, > > } while (nr_left > 0); > > } > > > > -uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, > > +static __rte_always_inline uint16_t > > +vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t > > +queue_id, > > struct rte_mbuf **pkts, uint16_t count) { > > - struct virtio_net *dev = get_device(vid); > > struct vhost_virtqueue *vq; > > uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0; > > uint16_t start_idx, pkts_idx, vq_size; > > struct async_inflight_info *pkts_info; > > uint16_t from, i; > > > > - if (!dev) > > - return 0; > > - > > - VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); > > - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { > > - VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue > idx %d.\n", > > - dev->vid, __func__, queue_id); > > - return 0; > > - } > > - > > vq = dev->virtqueue[queue_id]; > > > > - if (unlikely(!vq->async_registered)) { > > - VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for > queue id %d.\n", > > - dev->vid, __func__, queue_id); > > - return 0; > > - } > > - > > - rte_spinlock_lock(&vq->access_lock); > > - > > pkts_idx = vq->async_pkts_idx % vq->size; > > pkts_info = vq->async_pkts_info; > > vq_size = vq->size; > > @@ -2119,14 +2101,14 @@ uint16_t > rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, > > vq_size, vq->async_pkts_inflight_n); > > > > if (count > vq->async_last_pkts_n) > > - n_pkts_cpl = vq->async_ops.check_completed_copies(vid, > > + n_pkts_cpl = vq->async_ops.check_completed_copies(dev- > >vid, > > queue_id, 0, count - vq->async_last_pkts_n); > > n_pkts_cpl += vq->async_last_pkts_n; > > > > n_pkts_put = RTE_MIN(count, n_pkts_cpl); > > if (unlikely(n_pkts_put == 0)) { > > vq->async_last_pkts_n = n_pkts_cpl; > > - goto done; > > + return 0; > > } > > > > if (vq_is_packed(dev)) { > > @@ -2165,12 +2147,74 @@ uint16_t > rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, > > vq->last_async_desc_idx_split += n_descs; > > } > > > > -done: > > + return n_pkts_put; > > +} > > + > > +uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, > > + struct rte_mbuf **pkts, uint16_t count) { > > + struct virtio_net *dev = get_device(vid); > > + struct vhost_virtqueue *vq; > > + uint16_t n_pkts_put = 0; > > + > > + if (!dev) > > + return 0; > > + > > + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); > > + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { > > + VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue > idx %d.\n", > > + dev->vid, __func__, queue_id); > > + return 0; > > + } > > + > > + vq = dev->virtqueue[queue_id]; > > + > > + if (unlikely(!vq->async_registered)) { > > + VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for > queue id %d.\n", > > + dev->vid, __func__, queue_id); > > + return 0; > > + } > > + > > + rte_spinlock_lock(&vq->access_lock); > > + > > + n_pkts_put = vhost_poll_enqueue_completed(dev, queue_id, pkts, > > +count); > > + > > rte_spinlock_unlock(&vq->access_lock); > > > > return n_pkts_put; > > } > > > > +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t > queue_id, > > + struct rte_mbuf **pkts, uint16_t count) { > > + struct virtio_net *dev = get_device(vid); > > + struct vhost_virtqueue *vq; > > + uint16_t n_pkts = count; > > + > > + if (!dev) > > + return 0; > > + > > + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); > > + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { > > + VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue > idx %d.\n", > > + dev->vid, __func__, queue_id); > > + return 0; > > + } > > + > > + vq = dev->virtqueue[queue_id]; > > + > > + if (unlikely(!vq->async_registered)) { > > + VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for > queue id %d.\n", > > + dev->vid, __func__, queue_id); > > + return 0; > > + } > > + > > + while (count) > > + count -= vhost_poll_enqueue_completed(dev, queue_id, > pkts, count); > > + > > + return n_pkts; > > +} > > + > > static __rte_always_inline uint32_t > > virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, > > struct rte_mbuf **pkts, uint32_t count, > >