Define specific function implementation for i40e driver.
Currently, mbufs recycle mode can support 128bit
vector path and
avx2
path.
And can be enabled both in fast free and no fast free
mode.
Suggested-by: Honnappa Nagarahalli
<honnappa.nagaraha...@arm.com>
Signed-off-by: Feifei Wang
<feifei.wa...@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
Reviewed-by: Honnappa Nagarahalli
<honnappa.nagaraha...@arm.com>
---
drivers/net/i40e/i40e_ethdev.c | 1 +
drivers/net/i40e/i40e_ethdev.h | 2 +
.../net/i40e/i40e_recycle_mbufs_vec_common.c |
147
++++++++++++++++++
drivers/net/i40e/i40e_rxtx.c | 32 ++++
drivers/net/i40e/i40e_rxtx.h | 4 +
drivers/net/i40e/meson.build | 1 +
6 files changed, 187 insertions(+) create mode
100644
drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
diff --git a/drivers/net/i40e/i40e_ethdev.c
b/drivers/net/i40e/i40e_ethdev.c index
8271bbb394..50ba9aac94
100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -496,6 +496,7 @@ static const struct
eth_dev_ops i40e_eth_dev_ops
= {
.flow_ops_get = i40e_dev_flow_ops_get,
.rxq_info_get = i40e_rxq_info_get,
.txq_info_get = i40e_txq_info_get,
+ .recycle_rxq_info_get =
i40e_recycle_rxq_info_get,
.rx_burst_mode_get =
i40e_rx_burst_mode_get,
.tx_burst_mode_get =
i40e_tx_burst_mode_get,
.timesync_enable = i40e_timesync_enable,
diff --git a/drivers/net/i40e/i40e_ethdev.h
b/drivers/net/i40e/i40e_ethdev.h index
6f65d5e0ac..af758798e1
100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1355,6 +1355,8 @@ void
i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t
queue_id,
struct rte_eth_rxq_info *qinfo); void
i40e_txq_info_get(struct rte_eth_dev *dev,
uint16_t
queue_id,
struct rte_eth_txq_info *qinfo);
+void i40e_recycle_rxq_info_get(struct
+rte_eth_dev *dev, uint16_t
queue_id,
+ struct rte_eth_recycle_rxq_info
+*recycle_rxq_info);
int i40e_rx_burst_mode_get(struct rte_eth_dev
*dev, uint16_t
queue_id,
struct rte_eth_burst_mode *mode);
int
i40e_tx_burst_mode_get(struct rte_eth_dev *dev,
uint16_t queue_id, diff -- git
a/drivers/net/i40e/i40e_recycle_mbufs_vec_common
.c
b/drivers/net/i40e/i40e_recycle_mbufs_vec_common
.c new file mode 100644 index
0000000000..5663ecccde
--- /dev/null
+++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_co
+++ mmon
+++ .c
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Arm Limited.
+ */
+
+#include <stdint.h> #include <ethdev_driver.h>
+
+#include "base/i40e_prototype.h"
+#include "base/i40e_type.h"
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+void
+i40e_recycle_rx_descriptors_refill_vec(void
+*rx_queue, uint16_t
+nb_mbufs) {
+ struct i40e_rx_queue *rxq = rx_queue;
+ struct i40e_rx_entry *rxep;
+ volatile union i40e_rx_desc *rxdp;
+ uint16_t rx_id;
+ uint64_t paddr;
+ uint64_t dma_addr;
+ uint16_t i;
+
+ rxdp = rxq->rx_ring + rxq->rxrearm_start;
+ rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+ for (i = 0; i < nb_mbufs; i++) {
+ /* Initialize rxdp descs. */
+ paddr = (rxep[i].mbuf)->buf_iova +
RTE_PKTMBUF_HEADROOM;
+ dma_addr = rte_cpu_to_le_64(paddr);
+ /* flush desc with pa dma_addr */
+ rxdp[i].read.hdr_addr = 0;
+ rxdp[i].read.pkt_addr = dma_addr;
+ }
+
+ /* Update the descriptor initializer index */
+ rxq->rxrearm_start += nb_mbufs;
+ rx_id = rxq->rxrearm_start - 1;
+
+ if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+ rxq->rxrearm_start = 0;
+ rx_id = rxq->nb_rx_desc - 1;
+ }
+
+ rxq->rxrearm_nb -= nb_mbufs;
+
+ rte_io_wmb();
+ /* Update the tail pointer on the NIC */
+ I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail,
+rx_id); }
+
+uint16_t
+i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
+ struct i40e_tx_queue *txq = tx_queue;
+ struct i40e_tx_entry *txep;
+ struct rte_mbuf **rxep;
+ int i, n;
+ uint16_t nb_recycle_mbufs;
+ uint16_t avail = 0;
+ uint16_t mbuf_ring_size = recycle_rxq_info-
mbuf_ring_size;
+ uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
+ uint16_t refill_requirement =
+recycle_rxq_info-
refill_requirement;
+ uint16_t refill_head = *recycle_rxq_info->refill_head;
+ uint16_t receive_tail =
+*recycle_rxq_info->receive_tail;
+
+ /* Get available recycling Rx buffers. */
+ avail = (mbuf_ring_size - (refill_head -
+receive_tail)) & mask;
+
+ /* Check Tx free thresh and Rx available space. */
+ if (txq->nb_tx_free > txq->tx_free_thresh ||
+avail <=
+txq-
tx_rs_thresh)
+ return 0;
+
+ /* check DD bits on threshold descriptor */
+ if
+((txq->tx_ring[txq->tx_next_dd].cmd_type_offset
+_bsz
+&
+
rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+
rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+ return 0;
+
+ n = txq->tx_rs_thresh;
+ nb_recycle_mbufs = n;
+
+ /* Mbufs recycle mode can only support no ring
+buffer
wrapping
around.
+ * Two case for this:
+ *
+ * case 1: The refill head of Rx buffer ring
+needs to be aligned
with
+ * mbuf ring size. In this case, the number of
+Tx
freeing buffers
+ * should be equal to refill_requirement.
+ *
+ * case 2: The refill head of Rx ring buffer
+does not need to be
aligned
+ * with mbuf ring size. In this case, the
+update of refill head
can not
+ * exceed the Rx mbuf ring size.
+ */
+ if (refill_requirement != n ||
+ (!refill_requirement && (refill_head + n >
mbuf_ring_size)))
+ return 0;
+
+ /* First buffer to free from S/W ring is at index
+ * tx_next_dd - (tx_rs_thresh-1).
+ */
+ txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+ rxep = recycle_rxq_info->mbuf_ring;
+ rxep += refill_head;
+
+ if (txq->offloads &
RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+ /* Avoid txq contains buffers from unexpected
mempool. */
+ if (unlikely(recycle_rxq_info->mp
+ != txep[0].mbuf-
pool))
+ return 0;
+
+ /* Directly put mbufs from Tx to Rx. */
+ for (i = 0; i < n; i++)
+ rxep[i] = txep[i].mbuf;
+ } else {
+ for (i = 0; i < n; i++) {
+ rxep[i] =
rte_pktmbuf_prefree_seg(txep[i].mbuf);
+
+ /* If Tx buffers are not the last
reference or
from
+ * unexpected mempool, previous
copied
buffers are
+ * considered as invalid.
+ */
+ if (unlikely((rxep[i] == NULL &&
refill_requirement) ||
[Konstantin]
Could you pls remind me why it is ok to have
rxep[i]==NULL when refill_requirement is not set?
If reill_requirement is not zero, it means each tx
freed buffer must be valid and can be put into Rx sw_ring.
Then the refill head of Rx buffer ring can be
aligned with mbuf ring size. Briefly speaking the
number
of Tx valid freed buffer must be equal to Rx
refill_requirement.
For example, i40e driver.
If reill_requirement is zero, it means that the
refill head of Rx buffer ring does not need to be
aligned with mbuf ring size, thus if Tx have n
valid freed buffers, we just need to put these n
buffers into Rx
sw-
ring, and not to be equal to the Rx setting rearm number.
For example, mlx5 driver.
In conclusion, above difference is due to pmd
drivers have different
strategies to update their Rx rearm(refill) head.
For i40e driver, if rearm_head exceed 1024, it
will be set as
0 due to the
number of each rearm is a fixed value by default.
For mlx5 driver. Its rearm_head can exceed 1024,
and use mask to achieve
real index. Thus its rearm number can be a different value.
Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it
means that this mbuf is not free yet and can't be reused.
Shouldn't we then set nb_recycle_mbufs = 0 in that case
too?
Or probably would be enough to skip that mbuf?
Might be something like that:
for (i = 0, j = 0; i < n; i++) {
rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
if (rxep[j] == NULL || recycle_rxq_info->mp !=
rxep[j].mbuf-
pool)) {
if (refill_requirement) {
nb_recycle_mbufs = 0;
break;
}
} else
j++;
}
/* now j contains actual number of recycled mbufs */
?
+ recycle_rxq_info-
mp !=
txep[i].mbuf-
pool))
+ nb_recycle_mbufs = 0;
+ }
+ /* If Tx buffers are not the last reference or
+ * from unexpected mempool, all recycled
buffers
+ * are put into mempool.
+ */
+ if (nb_recycle_mbufs == 0)
+ for (i = 0; i < n; i++) {
+ if (rxep[i] != NULL)
+
rte_mempool_put(rxep[i]-
pool,
rxep[i]);
+ }
+ }
+
[Konstantin] After another thought, it might be easier
and cleaner
just to:
if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf-
pool)
nb_recycle_mbufs = 0;
Anyway, from my understanding - if
rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we
can't recycle
that mbuf.
[Feifei] Agree with that
'rte_pktmbuf_prefree_seg(mbuf) returns NULL, then
we can't recycle that mbuf'.
Firstly, we should know for i40e driver, the number
of free mbufs is fixed, it
must equal to 'tx_rs_thresh'
This means if we start to free Tx mbufs, it cannot be
interrupted until the
given amount of mbufs are freed.
In the meanwhile, doing prefree operation for a Tx
mbuf can be looked at this mbuf is freed from this TX
sw-ring if the API returns NULL. This is due
to that call 'prefree_seg' function means update the mbuf
refcnt.
So let's come back our recycle mbuf case.
Here we consider that the case if we have 32 mbufs
need to be freed, and
we firstly do the pre-free.
And then first 8 mbufs is good and return value is not none.
But the 9th
mbuf is bad, its refcnt is more than 1.
So we just update its refcnt and cannot put it into Rx sw-ring.
For Tx sw-ring,
this mbuf has been freed.
Then we should continue to do pre-free operation for
the next Tx mbufs to ensure the given amount of mbufs are
freed.
Do a conclusion for this, this is because if we start
to do pre-free operation, the Tx mbuf refcnt value
maybe changed, so we cannot stop or
break until finish all the pre-free operation.
Finally, in the case that Rx refill_request is not
zero, but the valid mbuf amount is less than this
value, we must put back this Tx mbufs into
mempool.
Above is the reason why I do not directly jump out of
the loop if some mbuf
return value is NULL.
Yep, I already realized that it is a bit more
complicated and we need to continue with prefree() for
all packets even when we get NULL in
the middle.
Anyway the code has to be changed, right?
Sorry, I think for the code, it is unnecessary to be changed.
For no fast free path, currently:
1. We check whether each mbuf is Ok and call pre_free function
----------------------------------------------------------
----
--
--
----
----------------------------------------------------------
----
2.1 For the mbuf return value is not NULL, it is put into Rx sw-
ring.
2.2 For the mbuf return value is zero and refill-request,
it will also firstly put into Rx sw-ring, and we set
nb_recycle =
0
----------------------------------------------------------
----
--
--
----
----------------------------------------------------------
----
3.1 We check nb_recycle, if it is not 0, we will continue
to rearm Rx descs
and update its index by call descs_refill function.
3.2 if nb_recycle is 0, we will put valid recycle mbufs
back into mempool as general path. This is necessary,
because we need to ensure the freed Tx number is
fixed.(Some buffers return is null can be seen as freed,
others need to be put into mempool)
Or maybe I ignore something?
I am talking about the case when both refill_requirement and
mbuf return values iare zero:
if (unlikely((rxep[i] == NULL && refill_requirement) || // ???
rxep[i]
==
0
AND refill_requirement == 0 ???
recycle_rxq_info->mp != txep[i].mbuf->pool))
nb_recycle_mbufs = 0;
As I can read the code you treat such situation as valid,
while I think we should reset nb_recycle_mbufs to zero when
rxep[i] == NULL, no matter what value refill_requirement is.
So this means for maybe MLX5 driver, its refill_request = 0.
And if some mbufs return value is zero, the other mbufs can
not be recycled into Rx sw-ring? Because nb_recycle=0, and
they need to be put into
mempool.
I think for such as MLX5 driver, we can allow recycle some
valid mbufs into
Rx ring.
Because no constraint for its refill number. Is my suggestion
reasonable?
I suppose yes: if refill_request is zero we potentially can skip 'bad'
mbufs and continue with recycling for remaining ones.
It would probably require more changes in current code, but
sounds ok to me in general.
That's Ok. Thanks for your careful reviewing.
I'm very sorry not to receive your e-mail and until now I realize we
need to do some code change for i40e driver. Also thanks ferruh to kindly
remind this.
No worries at all and thanks for you and Ferruh fro fast reaction.
Agree with you we need some operation for the case that
(refill_requirement == 0 && rxep[i] == 0).
Thus maybe we can do a change as follows:
for (i = 0; i < n; i++) {
rxep[0] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
if (unlikely((rxep[0] == NULL && refill_requirement) ||
recycle_rxq_info->mp != txep[i].mbuf->pool))
nb_recycle_mbufs = 0;
if (likely(rxep[0]))
rxep++;
}
Is above change is Ok?
Just do a change for the above code:
----------------------------------------------------------------------
-------------------------------
int j = 0;
......
for (i = 0; i < n; i++) {
rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
if (unlikely(rxep[j] == NULL && !refill_requirement))
continue;
if (unlikely((rxep[j] == NULL && refill_requirement) ||
recycle_rxq_info->mp != txep[i].mbuf->pool))
nb_recycle_mbufs = 0;
j++;
}
if (nb_recycle_mbufs == 0)
for (i = 0; i < j; i++) {
if (rxep[i] != NULL)
rte_mempool_put(rxep[i]->pool, rxep[i]);
}
I think it should work...
One more thing, shouldn't we reset
nb_recycle_mbufs = j;
Something like:
if (nb_recycle_mbufs == 0) {
for (i = 0; i < j; i++) {
if (rxep[i] != NULL)
rte_mempool_put(rxep[i]->pool, rxep[i]);
}
} else
nb_recycle_mbufs = j;
?
Yes, we should reset it.
I just do a performance test with the new code and find the performance
decrease with 8%, comparing
with the old version.
Thus I think we should come back your previous idea:
"we should reset nb_recycle_mbufs to zero when rxep[i] == NULL,
no matter what value refill_requirement is":
for (i = 0; i < n; i++) {
rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
if (unlikely((rxep[i] == NULL) ||
recycle_rxq_info->mp != txep[i].mbuf->pool))
nb_recycle_mbufs = 0;
}
Thus we drop the case that putting the remaining valid mbuf into Rx sw-ring
when no refill_request, but maintain good performance.
I am ok with that way too.
Again that way, it looks simpler and probaly less error-prone.
Thanks
Konstantin