Tx data ring support was removed in a previous change to add multi-seg transmit. This change adds it back.
According to the original commit (2e849373), 64B pkt rate with l2fwd improved by ~20% on an Ivy Bridge server at which point we start to hit some bottleneck on the rx side. I also re-did the same test on a different setup (Haswell processor, ~2.3GHz clock rate) on top of the master and still observed ~17% performance gains. Fixes: 7ba5de417e3c ("vmxnet3: support multi-segment transmit") Signed-off-by: Yong Wang <yongwang at vmware.com> --- doc/guides/rel_notes/release_2_3.rst | 5 +++++ drivers/net/vmxnet3/vmxnet3_rxtx.c | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/guides/rel_notes/release_2_3.rst b/doc/guides/rel_notes/release_2_3.rst index 99de186..a23c8ac 100644 --- a/doc/guides/rel_notes/release_2_3.rst +++ b/doc/guides/rel_notes/release_2_3.rst @@ -15,6 +15,11 @@ EAL Drivers ~~~~~~~ +* **vmxnet3: restore tx data ring.** + + Tx data ring has been shown to improve small pkt forwarding performance + on vSphere environment. + Libraries ~~~~~~~~~ diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c index 4de5d89..2202d31 100644 --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c @@ -348,6 +348,7 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint32_t first2fill, avail, dw2; struct rte_mbuf *txm = tx_pkts[nb_tx]; struct rte_mbuf *m_seg = txm; + int copy_size = 0; /* Is this packet execessively fragmented, then drop */ if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) { @@ -365,6 +366,14 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, break; } + if (rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) { + struct Vmxnet3_TxDataDesc *tdd; + + tdd = txq->data_ring.base + txq->cmd_ring.next2fill; + copy_size = rte_pktmbuf_pkt_len(txm); + rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size); + } + /* use the previous gen bit for the SOP desc */ dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; first2fill = txq->cmd_ring.next2fill; @@ -377,7 +386,13 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, transmit buffer size (16K) is greater than maximum sizeof mbuf segment size. */ gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill; - gdesc->txd.addr = RTE_MBUF_DATA_DMA_ADDR(m_seg); + if (copy_size) + gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA + + txq->cmd_ring.next2fill * + sizeof(struct Vmxnet3_TxDataDesc)); + else + gdesc->txd.addr = RTE_MBUF_DATA_DMA_ADDR(m_seg); + gdesc->dword[2] = dw2 | m_seg->data_len; gdesc->dword[3] = 0; -- 1.9.1