On 9/10/21 11:05, Gaoxiang Liu wrote:
To improve performance of vhost Tx, merge repeated loop in eth_vhost_tx.
Move "vlan insert" from eth_vhost_tx to virtio_dev_rx_packed
and virtio_dev_rx_split to reduce a loop iteration.

Fixes: f63d356ee993 ("net/vhost: insert/strip VLAN header in software")
Cc: sta...@dpdk.org

This kind of performance optimization should not be backported to stable
branches.


Signed-off-by: Gaoxiang Liu <gaoxiangl...@163.com>
---
  drivers/net/vhost/rte_eth_vhost.c | 25 ++++---------------------
  lib/vhost/virtio_net.c            | 21 +++++++++++++++++++++
  2 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/drivers/net/vhost/rte_eth_vhost.c 
b/drivers/net/vhost/rte_eth_vhost.c
index a202931e9a..ae20550976 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -428,7 +428,6 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
  {
        struct vhost_queue *r = q;
        uint16_t i, nb_tx = 0;
-       uint16_t nb_send = 0;
        uint64_t nb_bytes = 0;
        uint64_t nb_missed = 0;
@@ -440,33 +439,17 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
        if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
                goto out;
- for (i = 0; i < nb_bufs; i++) {
-               struct rte_mbuf *m = bufs[i];
-
-               /* Do VLAN tag insertion */
-               if (m->ol_flags & PKT_TX_VLAN_PKT) {
-                       int error = rte_vlan_insert(&m);
-                       if (unlikely(error)) {
-                               rte_pktmbuf_free(m);
-                               continue;
-                       }
-               }
-
-               bufs[nb_send] = m;
-               ++nb_send;
-       }
-
        /* Enqueue packets to guest RX queue */
-       while (nb_send) {
+       while (nb_bufs) {
                uint16_t nb_pkts;
-               uint16_t num = (uint16_t)RTE_MIN(nb_send,
+               uint16_t num = (uint16_t)RTE_MIN(nb_bufs,
                                                 VHOST_MAX_PKT_BURST);
nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id,
                                                  &bufs[nb_tx], num);
nb_tx += nb_pkts;
-               nb_send -= nb_pkts;
+               nb_bufs -= nb_pkts;
                if (nb_pkts < num)
                        break;
        }
@@ -474,7 +457,7 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
        for (i = 0; likely(i < nb_tx); i++)
                nb_bytes += bufs[i]->pkt_len;
- nb_missed = nb_bufs - nb_tx;
+       nb_missed = nb_bufs;
r->stats.pkts += nb_tx;
        r->stats.bytes += nb_bytes;
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 8549afbbe1..2057f4e7fe 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1218,6 +1218,16 @@ virtio_dev_rx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
                uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
                uint16_t nr_vec = 0;
+ /* Do VLAN tag insertion */
+               if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
+                       int error = rte_vlan_insert(&pkts[pkt_idx]);
+                       if (unlikely(error)) {
+                               rte_pktmbuf_free(pkts[pkt_idx]);
+                               pkts[pkt_idx] = NULL;
+                               continue;
+                       }
+               }
+
                if (unlikely(reserve_avail_buf_split(dev, vq,
                                                pkt_len, buf_vec, &num_buffers,
                                                avail_head, &nr_vec) < 0)) {
@@ -1490,6 +1500,17 @@ virtio_dev_rx_packed(struct virtio_net *dev,
        do {
                rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+ /* Do VLAN tag insertion */
+               if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
+                       int error = rte_vlan_insert(&pkts[pkt_idx]);
+                       if (unlikely(error)) {
+                               rte_pktmbuf_free(pkts[pkt_idx]);
+                               pkts[pkt_idx] = NULL;
+                               pkt_idx++;
+                               continue;
+                       }
+               }
+
                if (count - pkt_idx >= PACKED_BATCH_SIZE) {
                        if (!virtio_dev_rx_sync_batch_packed(dev, vq,
                                                        &pkts[pkt_idx])) {


It would make sense to do that in virtio_enqueue_offload, and it would
avoid code duplication.

Regards,
Maxime

Reply via email to