Hi, > -----Original Message----- > From: dev <dev-boun...@dpdk.org> On Behalf Of Viacheslav Ovsiienko > Sent: Wednesday, August 7, 2019 3:58 PM > To: dev@dpdk.org > Cc: Yongseok Koh <ys...@mellanox.com>; Shahaf Shuler > <shah...@mellanox.com> > Subject: [dpdk-dev] [PATCH] net/mlx5: fix completion request for multi- > segment packets > > The copying of sent mbufs pointers might be deferred to the end of > tx_burst() routine to be copied in one call of rte_memcpy. > For the multi segment packets this optimization is not applicable, > because number of packets does not match with number of mbufs and > we do not have linear array of pointers in pkts parameter. > > The completion request generating routine wrongly took into account > the inconsistent (for multi-segment packets) deferred pointer copying. > > Fixes: 5a93e173b874 ("net/mlx5: fix Tx completion request generation") > > Signed-off-by: Viacheslav Ovsiienko <viachesl...@mellanox.com> > --- > drivers/net/mlx5/mlx5_rxtx.c | 23 ++++++++++++++--------- > 1 file changed, 14 insertions(+), 9 deletions(-) > > diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c > index 3e51bb0..4c01187 100644 > --- a/drivers/net/mlx5/mlx5_rxtx.c > +++ b/drivers/net/mlx5/mlx5_rxtx.c > @@ -2130,6 +2130,9 @@ enum mlx5_txcmp_code { > * Pointer to TX queue structure. > * @param loc > * Pointer to burst routine local context. > + * @param multi, > + * Routine is called from multi-segment sending loop, > + * do not correct the elts_head according to the pkts_copy. > * @param olx > * Configured Tx offloads mask. It is fully defined at > * compile time and may be used for optimization. > @@ -2137,12 +2140,14 @@ enum mlx5_txcmp_code { > static __rte_always_inline void > mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, > struct mlx5_txq_local *restrict loc, > + bool multi, > unsigned int olx) > { > uint16_t head = txq->elts_head; > unsigned int part; > > - part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc->pkts_sent - loc- > >pkts_copy; > + part = (MLX5_TXOFF_CONFIG(INLINE) || multi) ? > + 0 : loc->pkts_sent - loc->pkts_copy; > head += part; > if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH > || > (MLX5_TXOFF_CONFIG(INLINE) && > @@ -3091,7 +3096,7 @@ enum mlx5_txcmp_code { > txq->wqe_ci += (ds + 3) / 4; > loc->wqe_free -= (ds + 3) / 4; > /* Request CQE generation if limits are reached. */ > - mlx5_tx_request_completion(txq, loc, olx); > + mlx5_tx_request_completion(txq, loc, true, olx); > return MLX5_TXCMP_CODE_MULTI; > } > > @@ -3201,7 +3206,7 @@ enum mlx5_txcmp_code { > txq->wqe_ci += (ds + 3) / 4; > loc->wqe_free -= (ds + 3) / 4; > /* Request CQE generation if limits are reached. */ > - mlx5_tx_request_completion(txq, loc, olx); > + mlx5_tx_request_completion(txq, loc, true, olx); > return MLX5_TXCMP_CODE_MULTI; > } > > @@ -3359,7 +3364,7 @@ enum mlx5_txcmp_code { > txq->wqe_ci += (ds + 3) / 4; > loc->wqe_free -= (ds + 3) / 4; > /* Request CQE generation if limits are reached. */ > - mlx5_tx_request_completion(txq, loc, olx); > + mlx5_tx_request_completion(txq, loc, true, olx); > return MLX5_TXCMP_CODE_MULTI; > } > > @@ -3570,7 +3575,7 @@ enum mlx5_txcmp_code { > ++loc->pkts_sent; > --pkts_n; > /* Request CQE generation if limits are reached. */ > - mlx5_tx_request_completion(txq, loc, olx); > + mlx5_tx_request_completion(txq, loc, false, olx); > if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) > return MLX5_TXCMP_CODE_EXIT; > loc->mbuf = *pkts++; > @@ -3728,7 +3733,7 @@ enum mlx5_txcmp_code { > txq->wqe_ci += (ds + 3) / 4; > loc->wqe_free -= (ds + 3) / 4; > /* Request CQE generation if limits are reached. */ > - mlx5_tx_request_completion(txq, loc, olx); > + mlx5_tx_request_completion(txq, loc, false, olx); > } > > /* > @@ -3772,7 +3777,7 @@ enum mlx5_txcmp_code { > txq->wqe_ci += (len + 3) / 4; > loc->wqe_free -= (len + 3) / 4; > /* Request CQE generation if limits are reached. */ > - mlx5_tx_request_completion(txq, loc, olx); > + mlx5_tx_request_completion(txq, loc, false, olx); > } > > /** > @@ -3965,7 +3970,7 @@ enum mlx5_txcmp_code { > loc->wqe_free -= (2 + part + 3) / 4; > pkts_n -= part; > /* Request CQE generation if limits are reached. */ > - mlx5_tx_request_completion(txq, loc, olx); > + mlx5_tx_request_completion(txq, loc, false, olx); > if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) > return MLX5_TXCMP_CODE_EXIT; > loc->mbuf = *pkts++; > @@ -4440,7 +4445,7 @@ enum mlx5_txcmp_code { > ++loc->pkts_sent; > --pkts_n; > /* Request CQE generation if limits are reached. */ > - mlx5_tx_request_completion(txq, loc, olx); > + mlx5_tx_request_completion(txq, loc, false, olx); > if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) > return MLX5_TXCMP_CODE_EXIT; > loc->mbuf = *pkts++; > -- > 1.8.3.1
Patch applied to next-net-mlx, Kindest regards, Raslan Darawsheh