On 9/10/21 11:05, Gaoxiang Liu wrote:
To improve performance of vhost Tx, merge repeated loop in eth_vhost_tx.
Move "vlan insert" from eth_vhost_tx to virtio_dev_rx_packed
and virtio_dev_rx_split to reduce a loop iteration.
Fixes: f63d356ee993 ("net/vhost: insert/strip VLAN header in software")
Cc: sta...@dpdk.org
This kind of performance optimization should not be backported to stable
branches.
Signed-off-by: Gaoxiang Liu <gaoxiangl...@163.com>
---
drivers/net/vhost/rte_eth_vhost.c | 25 ++++---------------------
lib/vhost/virtio_net.c | 21 +++++++++++++++++++++
2 files changed, 25 insertions(+), 21 deletions(-)
diff --git a/drivers/net/vhost/rte_eth_vhost.c
b/drivers/net/vhost/rte_eth_vhost.c
index a202931e9a..ae20550976 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -428,7 +428,6 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t
nb_bufs)
{
struct vhost_queue *r = q;
uint16_t i, nb_tx = 0;
- uint16_t nb_send = 0;
uint64_t nb_bytes = 0;
uint64_t nb_missed = 0;
@@ -440,33 +439,17 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
goto out;
- for (i = 0; i < nb_bufs; i++) {
- struct rte_mbuf *m = bufs[i];
-
- /* Do VLAN tag insertion */
- if (m->ol_flags & PKT_TX_VLAN_PKT) {
- int error = rte_vlan_insert(&m);
- if (unlikely(error)) {
- rte_pktmbuf_free(m);
- continue;
- }
- }
-
- bufs[nb_send] = m;
- ++nb_send;
- }
-
/* Enqueue packets to guest RX queue */
- while (nb_send) {
+ while (nb_bufs) {
uint16_t nb_pkts;
- uint16_t num = (uint16_t)RTE_MIN(nb_send,
+ uint16_t num = (uint16_t)RTE_MIN(nb_bufs,
VHOST_MAX_PKT_BURST);
nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id,
&bufs[nb_tx], num);
nb_tx += nb_pkts;
- nb_send -= nb_pkts;
+ nb_bufs -= nb_pkts;
if (nb_pkts < num)
break;
}
@@ -474,7 +457,7 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t
nb_bufs)
for (i = 0; likely(i < nb_tx); i++)
nb_bytes += bufs[i]->pkt_len;
- nb_missed = nb_bufs - nb_tx;
+ nb_missed = nb_bufs;
r->stats.pkts += nb_tx;
r->stats.bytes += nb_bytes;
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 8549afbbe1..2057f4e7fe 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1218,6 +1218,16 @@ virtio_dev_rx_split(struct virtio_net *dev, struct
vhost_virtqueue *vq,
uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
uint16_t nr_vec = 0;
+ /* Do VLAN tag insertion */
+ if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
+ int error = rte_vlan_insert(&pkts[pkt_idx]);
+ if (unlikely(error)) {
+ rte_pktmbuf_free(pkts[pkt_idx]);
+ pkts[pkt_idx] = NULL;
+ continue;
+ }
+ }
+
if (unlikely(reserve_avail_buf_split(dev, vq,
pkt_len, buf_vec, &num_buffers,
avail_head, &nr_vec) < 0)) {
@@ -1490,6 +1500,17 @@ virtio_dev_rx_packed(struct virtio_net *dev,
do {
rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+ /* Do VLAN tag insertion */
+ if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
+ int error = rte_vlan_insert(&pkts[pkt_idx]);
+ if (unlikely(error)) {
+ rte_pktmbuf_free(pkts[pkt_idx]);
+ pkts[pkt_idx] = NULL;
+ pkt_idx++;
+ continue;
+ }
+ }
+
if (count - pkt_idx >= PACKED_BATCH_SIZE) {
if (!virtio_dev_rx_sync_batch_packed(dev, vq,
&pkts[pkt_idx])) {
It would make sense to do that in virtio_enqueue_offload, and it would
avoid code duplication.
Regards,
Maxime