Add batch datapath for async vhost packed ring to improve the performance of small packet processing.
Signed-off-by: Cheng Jiang <cheng1.ji...@intel.com> --- lib/vhost/virtio_net.c | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index 5d540e5599..f60f97ec72 100644 --- a/lib/vhost/virtio_net.c +++ b/lib/vhost/virtio_net.c @@ -1724,6 +1724,29 @@ vhost_update_used_packed(struct vhost_virtqueue *vq, vq->desc_packed[head_idx].flags = head_flags; } +static __rte_always_inline int +virtio_dev_rx_async_batch_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mbuf **pkts, + struct rte_mbuf **comp_pkts, uint32_t *pkt_done) +{ + uint16_t i; + uint32_t cpy_threshold = vq->async_threshold; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(pkts[i]->pkt_len >= cpy_threshold)) + return -1; + } + if (!virtio_dev_rx_batch_packed(dev, vq, pkts)) { + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + comp_pkts[(*pkt_done)++] = pkts[i]; + + return 0; + } + + return -1; +} + static __rte_always_inline int vhost_enqueue_async_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, @@ -1872,6 +1895,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct rte_mbuf **comp_pkts, uint32_t *comp_count) { uint32_t pkt_idx = 0, pkt_burst_idx = 0; + uint32_t remained = count; uint16_t async_descs_idx = 0; uint16_t num_buffers; uint16_t num_descs; @@ -1889,12 +1913,19 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev, uint32_t num_async_pkts = 0, num_done_pkts = 0; struct vring_packed_desc async_descs[vq->size]; - rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); + do { + rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); + if (remained >= PACKED_BATCH_SIZE) { + if (!virtio_dev_rx_async_batch_packed(dev, vq, + &pkts[pkt_idx], comp_pkts, &num_done_pkts)) { + pkt_idx += PACKED_BATCH_SIZE; + remained -= PACKED_BATCH_SIZE; + continue; + } + } - for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { num_buffers = 0; num_descs = 0; - if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, pkts[pkt_idx], &num_descs, &num_buffers, &async_descs[async_descs_idx], @@ -1937,6 +1968,8 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev, comp_pkts[num_done_pkts++] = pkts[pkt_idx]; } + pkt_idx++; + remained--; vq_inc_last_avail_packed(vq, num_descs); /* @@ -1961,13 +1994,12 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev, */ pkt_err = pkt_burst_idx - n_pkts; pkt_burst_idx = 0; - pkt_idx++; break; } pkt_burst_idx = 0; } - } + } while (pkt_idx < count); if (pkt_burst_idx) { n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx); -- 2.29.2