Hi Maxime, > -----Original Message----- > From: Maxime Coquelin <maxime.coque...@redhat.com> > Sent: Tuesday, July 6, 2021 10:09 PM > To: Jiang, Cheng1 <cheng1.ji...@intel.com>; Xia, Chenbo > <chenbo....@intel.com> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu...@intel.com>; Yang, YvonneX > <yvonnex.y...@intel.com> > Subject: Re: [PATCH v2 1/3] vhost: add unsafe API to drain pkts in async vhost > > > > On 6/15/21 4:15 PM, Cheng Jiang wrote: > > Applications need to stop DMA transfers and finish all the in-flight > > pkts when in VM memory hot-plug case and async vhost is used. This > > patch is to provide an unsafe API to drain in-flight pkts which are > > submitted to DMA engine in vhost async data path. > > > > Signed-off-by: Cheng Jiang <cheng1.ji...@intel.com> > > --- > > lib/vhost/rte_vhost_async.h | 22 +++++++++ > > lib/vhost/version.map | 3 ++ > > lib/vhost/virtio_net.c | 90 +++++++++++++++++++++++++++---------- > > 3 files changed, 92 insertions(+), 23 deletions(-) > > > > diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h > > index 6faa31f5ad..041f40cf04 100644 > > --- a/lib/vhost/rte_vhost_async.h > > +++ b/lib/vhost/rte_vhost_async.h > > @@ -193,4 +193,26 @@ __rte_experimental uint16_t > > rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, > > struct rte_mbuf **pkts, uint16_t count); > > > > +/** > > + * This function checks async completion status and empty all pakcets > > + * for a specific vhost device queue. Packets which are inflight will > > + * be returned in an array. > > + * > > + * @note This function does not perform any locking > > + * > > + * @param vid > > + * id of vhost device to enqueue data > > + * @param queue_id > > + * queue id to enqueue data > > + * @param pkts > > + * blank array to get return packet pointer > > + * @param count > > + * size of the packet array > > + * @return > > + * num of packets returned > > + */ > > +__rte_experimental > > +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t > queue_id, > > + struct rte_mbuf **pkts, uint16_t count); > > + > > #endif /* _RTE_VHOST_ASYNC_H_ */ > > diff --git a/lib/vhost/version.map b/lib/vhost/version.map index > > 9103a23cd4..f480f188af 100644 > > --- a/lib/vhost/version.map > > +++ b/lib/vhost/version.map > > @@ -79,4 +79,7 @@ EXPERIMENTAL { > > > > # added in 21.05 > > rte_vhost_get_negotiated_protocol_features; > > + > > + # added in 21.08 > > + rte_vhost_drain_queue_thread_unsafe; > > }; > > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index > > 8da8a86a10..793510974a 100644 > > --- a/lib/vhost/virtio_net.c > > +++ b/lib/vhost/virtio_net.c > > @@ -2082,36 +2082,18 @@ write_back_completed_descs_packed(struct > vhost_virtqueue *vq, > > } while (nr_left > 0); > > } > > > > -uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, > > +static __rte_always_inline uint16_t > > +vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t > > +queue_id, > > struct rte_mbuf **pkts, uint16_t count) { > > - struct virtio_net *dev = get_device(vid); > > struct vhost_virtqueue *vq; > > uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0; > > uint16_t start_idx, pkts_idx, vq_size; > > struct async_inflight_info *pkts_info; > > uint16_t from, i; > > > > - if (!dev) > > - return 0; > > - > > - VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); > > - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { > > - VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue > idx %d.\n", > > - dev->vid, __func__, queue_id); > > - return 0; > > - } > > - > > vq = dev->virtqueue[queue_id]; > > > > - if (unlikely(!vq->async_registered)) { > > - VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for > queue id %d.\n", > > - dev->vid, __func__, queue_id); > > - return 0; > > - } > > - > > - rte_spinlock_lock(&vq->access_lock); > > - > > pkts_idx = vq->async_pkts_idx % vq->size; > > pkts_info = vq->async_pkts_info; > > vq_size = vq->size; > > @@ -2119,14 +2101,14 @@ uint16_t > rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, > > vq_size, vq->async_pkts_inflight_n); > > > > if (count > vq->async_last_pkts_n) > > - n_pkts_cpl = vq->async_ops.check_completed_copies(vid, > > + n_pkts_cpl = vq->async_ops.check_completed_copies(dev- > >vid, > > queue_id, 0, count - vq->async_last_pkts_n); > > n_pkts_cpl += vq->async_last_pkts_n; > > > > n_pkts_put = RTE_MIN(count, n_pkts_cpl); > > if (unlikely(n_pkts_put == 0)) { > > vq->async_last_pkts_n = n_pkts_cpl; > > - goto done; > > + return 0; > > } > > > > if (vq_is_packed(dev)) { > > @@ -2165,12 +2147,74 @@ uint16_t > rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, > > vq->last_async_desc_idx_split += n_descs; > > } > > > > -done: > > + return n_pkts_put; > > +} > > + > > +uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id, > > + struct rte_mbuf **pkts, uint16_t count) { > > + struct virtio_net *dev = get_device(vid); > > + struct vhost_virtqueue *vq; > > + uint16_t n_pkts_put = 0; > > + > > + if (!dev) > > + return 0; > > + > > + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); > > + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { > > + VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue > idx %d.\n", > > + dev->vid, __func__, queue_id); > > + return 0; > > + } > > + > > + vq = dev->virtqueue[queue_id]; > > + > > + if (unlikely(!vq->async_registered)) { > > + VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for > queue id %d.\n", > > + dev->vid, __func__, queue_id); > > + return 0; > > + } > > + > > + rte_spinlock_lock(&vq->access_lock); > > + > > + n_pkts_put = vhost_poll_enqueue_completed(dev, queue_id, pkts, > > +count); > > + > > rte_spinlock_unlock(&vq->access_lock); > > > > return n_pkts_put; > > } > > > > +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t > queue_id, > > + struct rte_mbuf **pkts, uint16_t count) { > > + struct virtio_net *dev = get_device(vid); > > + struct vhost_virtqueue *vq; > > + uint16_t n_pkts = count; > > + > > + if (!dev) > > + return 0; > > + > > + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); > > + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { > > + VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue > idx %d.\n", > > + dev->vid, __func__, queue_id); > > + return 0; > > + } > > + > > + vq = dev->virtqueue[queue_id]; > > + > > + if (unlikely(!vq->async_registered)) { > > + VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for > queue id %d.\n", > > + dev->vid, __func__, queue_id); > > + return 0; > > + } > > + > > + while (count) > > + count -= vhost_poll_enqueue_completed(dev, queue_id, pkts, > count); > > I think we may want to improve the sync_ops so that > .check_completed_copies() returns an int. If for some reason the DMA driver > callback fails, we would poll forever.
Agree, it makes sense to change the return type of .check_completed_copies() to int to report callback failure case. > > Looking more into the code, I see that ioat_check_completed_copies_cb() an > return -1 (whereas it should return an unint32_t). It would lead to undefined > behaviour if the failure would happen. The IOAT driver needs to be fixed, > and also the callback prototype and its handling. Current async design only handles .transfer_data() failure, but requires .check_completed_copies() to handle DMA copy failure. That is, .check_completed_copies() always reports all copy success to vhost, even if a DMA copy fails. And it can fall back to SW copy for the failed DMA copies. The another choice is to make vhost handle DMA copy failure. Although IOAT driver already supports to report failed copies, dmadev is under discussion. I am not sure if the interface will change after we have dmadev. So maybe it's better to leave it open until we have dmadev. How do you think? Thanks, Jiayu > > > + > > + return n_pkts; > > +} > > + > > static __rte_always_inline uint32_t > > virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id, > > struct rte_mbuf **pkts, uint32_t count, > >