The AVX-512 code path used a smaller SW ring structure only containing the mbuf pointer, but no other fields. The other fields are only used in the scalar code path, so update all vector driver code paths to use the smaller, faster structure.
Signed-off-by: Bruce Richardson <bruce.richard...@intel.com> --- drivers/common/intel_eth/ieth_rxtx_vec_common.h | 7 +++++++ drivers/net/ice/ice_rxtx.c | 2 +- drivers/net/ice/ice_rxtx_vec_avx2.c | 12 ++++++------ drivers/net/ice/ice_rxtx_vec_avx512.c | 14 ++------------ drivers/net/ice/ice_rxtx_vec_common.h | 6 ------ drivers/net/ice/ice_rxtx_vec_sse.c | 12 ++++++------ 6 files changed, 22 insertions(+), 31 deletions(-) diff --git a/drivers/common/intel_eth/ieth_rxtx_vec_common.h b/drivers/common/intel_eth/ieth_rxtx_vec_common.h index a4490f2dca..c8ac788f98 100644 --- a/drivers/common/intel_eth/ieth_rxtx_vec_common.h +++ b/drivers/common/intel_eth/ieth_rxtx_vec_common.h @@ -87,6 +87,13 @@ ieth_tx_backlog_entry(struct ieth_tx_entry *txep, struct rte_mbuf **tx_pkts, uin txep[i].mbuf = tx_pkts[i]; } +static __rte_always_inline void +ieth_tx_backlog_entry_vec(struct ieth_vec_tx_entry *txep, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + for (uint16_t i = 0; i < (int)nb_pkts; ++i) + txep[i].mbuf = tx_pkts[i]; +} + #define IETH_VPMD_TX_MAX_FREE_BUF 64 typedef int (*ieth_desc_done_fn)(struct ieth_tx_queue *txq, uint16_t idx); diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c index 5e58314b57..127bc604f0 100644 --- a/drivers/net/ice/ice_rxtx.c +++ b/drivers/net/ice/ice_rxtx.c @@ -825,7 +825,7 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id) /* record what kind of descriptor cleanup we need on teardown */ txq->vector_tx = ad->tx_vec_allowed; - txq->vector_sw_ring = ad->tx_use_avx512; + txq->vector_sw_ring = txq->vector_tx; dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c index 370871c320..7799d631f8 100644 --- a/drivers/net/ice/ice_rxtx_vec_avx2.c +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c @@ -858,7 +858,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts, { struct ieth_tx_queue *txq = (struct ieth_tx_queue *)tx_queue; volatile struct ice_tx_desc *txdp; - struct ieth_tx_entry *txep; + struct ieth_vec_tx_entry *txep; uint16_t n, nb_commit, tx_id; uint64_t flags = ICE_TD_CMD; uint64_t rs = ICE_TX_DESC_CMD_RS | ICE_TD_CMD; @@ -867,7 +867,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts, nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); if (txq->nb_tx_free < txq->tx_free_thresh) - ice_tx_free_bufs_vec(txq); + ieth_tx_free_bufs_vector(txq, ice_tx_desc_done, false); nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); if (unlikely(nb_pkts == 0)) @@ -875,13 +875,13 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts, tx_id = txq->tx_tail; txdp = &txq->ice_tx_ring[tx_id]; - txep = &txq->sw_ring[tx_id]; + txep = &txq->sw_ring_v[tx_id]; txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); n = (uint16_t)(txq->nb_tx_desc - tx_id); if (nb_commit >= n) { - ieth_tx_backlog_entry(txep, tx_pkts, n); + ieth_tx_backlog_entry_vec(txep, tx_pkts, n); ice_vtx(txdp, tx_pkts, n - 1, flags, offload); tx_pkts += (n - 1); @@ -896,10 +896,10 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts, /* avoid reach the end of ring */ txdp = &txq->ice_tx_ring[tx_id]; - txep = &txq->sw_ring[tx_id]; + txep = &txq->sw_ring_v[tx_id]; } - ieth_tx_backlog_entry(txep, tx_pkts, nb_commit); + ieth_tx_backlog_entry_vec(txep, tx_pkts, nb_commit); ice_vtx(txdp, tx_pkts, nb_commit, flags, offload); diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c index c3cbd601b3..6c2c76f6fc 100644 --- a/drivers/net/ice/ice_rxtx_vec_avx512.c +++ b/drivers/net/ice/ice_rxtx_vec_avx512.c @@ -924,16 +924,6 @@ ice_vtx(volatile struct ice_tx_desc *txdp, struct rte_mbuf **pkt, } } -static __rte_always_inline void -ice_tx_backlog_entry_avx512(struct ieth_vec_tx_entry *txep, - struct rte_mbuf **tx_pkts, uint16_t nb_pkts) -{ - int i; - - for (i = 0; i < (int)nb_pkts; ++i) - txep[i].mbuf = tx_pkts[i]; -} - static __rte_always_inline uint16_t ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts, bool do_offload) @@ -964,7 +954,7 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts, n = (uint16_t)(txq->nb_tx_desc - tx_id); if (nb_commit >= n) { - ice_tx_backlog_entry_avx512(txep, tx_pkts, n); + ieth_tx_backlog_entry_vec(txep, tx_pkts, n); ice_vtx(txdp, tx_pkts, n - 1, flags, do_offload); tx_pkts += (n - 1); @@ -982,7 +972,7 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts, txep = (void *)txq->sw_ring; } - ice_tx_backlog_entry_avx512(txep, tx_pkts, nb_commit); + ieth_tx_backlog_entry_vec(txep, tx_pkts, nb_commit); ice_vtx(txdp, tx_pkts, nb_commit, flags, do_offload); diff --git a/drivers/net/ice/ice_rxtx_vec_common.h b/drivers/net/ice/ice_rxtx_vec_common.h index e1493cc28b..7ddc62e4a1 100644 --- a/drivers/net/ice/ice_rxtx_vec_common.h +++ b/drivers/net/ice/ice_rxtx_vec_common.h @@ -20,12 +20,6 @@ ice_tx_desc_done(struct ieth_tx_queue *txq, uint16_t idx) rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE); } -static __rte_always_inline int -ice_tx_free_bufs_vec(struct ieth_tx_queue *txq) -{ - return ieth_tx_free_bufs(txq, ice_tx_desc_done); -} - static inline void _ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq) { diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c index c89cbf2b15..0cbb84eeb0 100644 --- a/drivers/net/ice/ice_rxtx_vec_sse.c +++ b/drivers/net/ice/ice_rxtx_vec_sse.c @@ -699,7 +699,7 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, { struct ieth_tx_queue *txq = (struct ieth_tx_queue *)tx_queue; volatile struct ice_tx_desc *txdp; - struct ieth_tx_entry *txep; + struct ieth_vec_tx_entry *txep; uint16_t n, nb_commit, tx_id; uint64_t flags = ICE_TD_CMD; uint64_t rs = ICE_TX_DESC_CMD_RS | ICE_TD_CMD; @@ -709,7 +709,7 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); if (txq->nb_tx_free < txq->tx_free_thresh) - ice_tx_free_bufs_vec(txq); + ieth_tx_free_bufs_vector(txq, ice_tx_desc_done, false); nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); nb_commit = nb_pkts; @@ -718,13 +718,13 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, tx_id = txq->tx_tail; txdp = &txq->ice_tx_ring[tx_id]; - txep = &txq->sw_ring[tx_id]; + txep = &txq->sw_ring_v[tx_id]; txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); n = (uint16_t)(txq->nb_tx_desc - tx_id); if (nb_commit >= n) { - ieth_tx_backlog_entry(txep, tx_pkts, n); + ieth_tx_backlog_entry_vec(txep, tx_pkts, n); for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp) ice_vtx1(txdp, *tx_pkts, flags); @@ -738,10 +738,10 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, /* avoid reach the end of ring */ txdp = &txq->ice_tx_ring[tx_id]; - txep = &txq->sw_ring[tx_id]; + txep = &txq->sw_ring_v[tx_id]; } - ieth_tx_backlog_entry(txep, tx_pkts, nb_commit); + ieth_tx_backlog_entry_vec(txep, tx_pkts, nb_commit); ice_vtx(txdp, tx_pkts, nb_commit, flags); -- 2.43.0