> > Thanks for the comments. > > From: Константин Ананьев <mailto:konstantin.v.anan...@yandex.ru> > Sent: Monday, June 5, 2023 8:54 PM > To: Feifei Wang <mailto:feifei.wa...@arm.com>; mailto:tho...@monjalon.net; > Ferruh Yigit <mailto:ferruh.yi...@amd.com>; > Andrew Rybchenko <mailto:andrew.rybche...@oktetlabs.ru> > Cc: mailto:dev@dpdk.org; nd <mailto:n...@arm.com>; Honnappa Nagarahalli > <mailto:honnappa.nagaraha...@arm.com>; Ruifeng Wang > <mailto:ruifeng.w...@arm.com> > Subject: Re: [PATCH v6 1/4] ethdev: add API for mbufs recycle mode > > > > Hi Feifei, > > few more comments from me, see below. > Add 'rte_eth_recycle_rx_queue_info_get' and 'rte_eth_recycle_mbufs' > APIs to recycle used mbufs from a transmit queue of an Ethernet device, > and move these mbufs into a mbuf ring for a receive queue of an Ethernet > device. This can bypass mempool 'put/get' operations hence saving CPU > cycles. > > For each recycling mbufs, the rte_eth_recycle_mbufs() function performs > the following operations: > - Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf > ring. > - Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed > from the Tx mbuf ring. > > Suggested-by: Honnappa Nagarahalli <mailto:honnappa.nagaraha...@arm.com> > Suggested-by: Ruifeng Wang <mailto:ruifeng.w...@arm.com> > Signed-off-by: Feifei Wang <mailto:feifei.wa...@arm.com> > Reviewed-by: Ruifeng Wang <mailto:ruifeng.w...@arm.com> > Reviewed-by: Honnappa Nagarahalli <mailto:honnappa.nagaraha...@arm.com> > --- > doc/guides/rel_notes/release_23_07.rst | 7 + > lib/ethdev/ethdev_driver.h | 10 ++ > lib/ethdev/ethdev_private.c | 2 + > lib/ethdev/rte_ethdev.c | 31 +++++ > lib/ethdev/rte_ethdev.h | 182 +++++++++++++++++++++++++ > lib/ethdev/rte_ethdev_core.h | 15 +- > lib/ethdev/version.map | 4 + > 7 files changed, 249 insertions(+), 2 deletions(-) > > diff --git a/doc/guides/rel_notes/release_23_07.rst > b/doc/guides/rel_notes/release_23_07.rst > index a9b1293689..f279036cb9 100644 > --- a/doc/guides/rel_notes/release_23_07.rst > +++ b/doc/guides/rel_notes/release_23_07.rst > @@ -55,6 +55,13 @@ New Features > Also, make sure to start the actual text at the margin. > ======================================================= > > +* **Add mbufs recycling support. ** > + Added ``rte_eth_recycle_rx_queue_info_get`` and ``rte_eth_recycle_mbufs`` > + APIs which allow the user to copy used mbufs from the Tx mbuf ring > + into the Rx mbuf ring. This feature supports the case that the Rx Ethernet > + device is different from the Tx Ethernet device with respective driver > + callback functions in ``rte_eth_recycle_mbufs``. > + > > Removed Items > ------------- > diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h > index 2c9d615fb5..c6723d5277 100644 > --- a/lib/ethdev/ethdev_driver.h > +++ b/lib/ethdev/ethdev_driver.h > @@ -59,6 +59,10 @@ struct rte_eth_dev { > eth_rx_descriptor_status_t rx_descriptor_status; > /** Check the status of a Tx descriptor */ > eth_tx_descriptor_status_t tx_descriptor_status; > + /** Pointer to PMD transmit mbufs reuse function */ > + eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse; > + /** Pointer to PMD receive descriptors refill function */ > + eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill; > > /** > * Device data that is shared between primary and secondary processes > @@ -504,6 +508,10 @@ typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev > *dev, > typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev, > uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo); > > +typedef void (*eth_recycle_rxq_info_get_t)(struct rte_eth_dev *dev, > + uint16_t rx_queue_id, > + struct rte_eth_recycle_rxq_info *recycle_rxq_info); > + > typedef int (*eth_burst_mode_get_t)(struct rte_eth_dev *dev, > uint16_t queue_id, struct rte_eth_burst_mode *mode); > > @@ -1247,6 +1255,8 @@ struct eth_dev_ops { > eth_rxq_info_get_t rxq_info_get; > /** Retrieve Tx queue information */ > eth_txq_info_get_t txq_info_get; > + /** Retrieve mbufs recycle Rx queue information */ > + eth_recycle_rxq_info_get_t recycle_rxq_info_get; > eth_burst_mode_get_t rx_burst_mode_get; /**< Get Rx burst mode */ > eth_burst_mode_get_t tx_burst_mode_get; /**< Get Tx burst mode */ > eth_fw_version_get_t fw_version_get; /**< Get firmware version */ > diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c > index 14ec8c6ccf..f8ab64f195 100644 > --- a/lib/ethdev/ethdev_private.c > +++ b/lib/ethdev/ethdev_private.c > @@ -277,6 +277,8 @@ eth_dev_fp_ops_setup(struct rte_eth_fp_ops *fpo, > fpo->rx_queue_count = dev->rx_queue_count; > fpo->rx_descriptor_status = dev->rx_descriptor_status; > fpo->tx_descriptor_status = dev->tx_descriptor_status; > + fpo->recycle_tx_mbufs_reuse = dev->recycle_tx_mbufs_reuse; > + fpo->recycle_rx_descriptors_refill = dev->recycle_rx_descriptors_refill; > > fpo->rxq.data = dev->data->rx_queues; > fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs; > diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c > index 4d03255683..7c27dcfea4 100644 > --- a/lib/ethdev/rte_ethdev.c > +++ b/lib/ethdev/rte_ethdev.c > @@ -5784,6 +5784,37 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t > queue_id, > return 0; > } > > +int > +rte_eth_recycle_rx_queue_info_get(uint16_t port_id, uint16_t queue_id, > + struct rte_eth_recycle_rxq_info *recycle_rxq_info) > +{ > + struct rte_eth_dev *dev; > + > + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); > + dev = &rte_eth_devices[port_id]; > + > + if (queue_id >= dev->data->nb_rx_queues) { > + RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u\n", queue_id); > + return -EINVAL; > + } > + > + if (dev->data->rx_queues == NULL || > + dev->data->rx_queues[queue_id] == NULL) { > + RTE_ETHDEV_LOG(ERR, > + "Rx queue %"PRIu16" of device with port_id=%" > + PRIu16" has not been setup\n", > + queue_id, port_id); > + return -EINVAL; > + } > + > + if (*dev->dev_ops->recycle_rxq_info_get == NULL) > + return -ENOTSUP; > + > + dev->dev_ops->recycle_rxq_info_get(dev, queue_id, recycle_rxq_info); > + > + return 0; > +} > + > int > rte_eth_rx_burst_mode_get(uint16_t port_id, uint16_t queue_id, > struct rte_eth_burst_mode *mode) > diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h > index 99fe9e238b..7434aa2483 100644 > --- a/lib/ethdev/rte_ethdev.h > +++ b/lib/ethdev/rte_ethdev.h > @@ -1820,6 +1820,30 @@ struct rte_eth_txq_info { > uint8_t queue_state; /**< one of RTE_ETH_QUEUE_STATE_*. */ > } __rte_cache_min_aligned; > > +/** > + * @warning > + * @b EXPERIMENTAL: this structure may change without prior notice. > + * > + * Ethernet device Rx queue information structure for recycling mbufs. > + * Used to retrieve Rx queue information when Tx queue reusing mbufs and > moving > + * them into Rx mbuf ring. > + */ > +struct rte_eth_recycle_rxq_info { > + struct rte_mbuf **mbuf_ring; /**< mbuf ring of Rx queue. */ > + struct rte_mempool *mp; /**< mempool of Rx queue. */ > + uint16_t *refill_head; /**< head of Rx queue refilling mbufs. */ > + uint16_t *receive_tail; /**< tail of Rx queue receiving pkts. */ > + uint16_t mbuf_ring_size; /**< configured number of mbuf ring size. */ > + /** > + * Requirement on mbuf refilling batch size of Rx mbuf ring. > + * For some PMD drivers, the number of Rx mbuf ring refilling mbufs > + * should be aligned with mbuf ring size, in order to simplify > + * ring wrapping around. > + * Value 0 means that PMD drivers have no requirement for this. > + */ > + uint16_t refill_requirement; > +} __rte_cache_min_aligned; > + > /* Generic Burst mode flag definition, values can be ORed. */ > > /** > @@ -4809,6 +4833,31 @@ int rte_eth_rx_queue_info_get(uint16_t port_id, > uint16_t queue_id, > int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id, > struct rte_eth_txq_info *qinfo); > > +/** > + * @warning > + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice > + * > + * Retrieve information about given ports's Rx queue for recycling mbufs. > + * > + * @param port_id > + * The port identifier of the Ethernet device. > + * @param queue_id > + * The Rx queue on the Ethernet devicefor which information > + * will be retrieved. > + * @param recycle_rxq_info > + * A pointer to a structure of type *rte_eth_recycle_rxq_info* to be filled. > + * > + * @return > + * - 0: Success > + * - -ENODEV: If *port_id* is invalid. > + * - -ENOTSUP: routine is not supported by the device PMD. > + * - -EINVAL: The queue_id is out of range. > + */ > +__rte_experimental > +int rte_eth_recycle_rx_queue_info_get(uint16_t port_id, > + uint16_t queue_id, > + struct rte_eth_recycle_rxq_info *recycle_rxq_info); > + > /** > * Retrieve information about the Rx packet burst mode. > * > @@ -6483,6 +6532,139 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id, > return rte_eth_tx_buffer_flush(port_id, queue_id, buffer); > } > > +/** > + * @warning > + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice > + * > + * Recycle used mbufs from a transmit queue of an Ethernet device, and move > + * these mbufs into a mbuf ring for a receive queue of an Ethernet device. > + * This can bypass mempool path to save CPU cycles. > + * > + * The rte_eth_recycle_mbufs() function loops, with rte_eth_rx_burst() and > + * rte_eth_tx_burst() functions, freeing Tx used mbufs and replenishing Rx > + * descriptors. The number of recycling mbufs depends on the request of Rx > mbuf > + * ring, with the constraint of enough used mbufs from Tx mbuf ring. > + * > + * For each recycling mbufs, the rte_eth_recycle_mbufs() function performs > the > + * following operations: > + * > + * - Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf > ring. > + * > + * - Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed > + * from the Tx mbuf ring. > + * > + * This function spilts Rx and Tx path with different callback functions. The > + * callback function recycle_tx_mbufs_reuse is for Tx driver. The callback > + * function recycle_rx_descriptors_refill is for Rx driver. > rte_eth_recycle_mbufs() > + * can support the case that Rx Ethernet device is different from Tx > Ethernet device. > + * > + * It is the responsibility of users to select the Rx/Tx queue pair to > recycle > + * mbufs. Before call this function, users must call > rte_eth_recycle_rxq_info_get > + * function to retrieve selected Rx queue information. > + * @see rte_eth_recycle_rxq_info_get, struct rte_eth_recycle_rxq_info > + * > + * Currently, the rte_eth_recycle_mbufs() function can only support one-time > pairing > + * between the receive queue and transmit queue. Do not pair one receive > queue with > + * multiple transmit queues or pair one transmit queue with multiple receive > queues, > + * in order to avoid memory error rewriting. > Probably I am missing something, but why it is not possible to do something > like that: > > rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, > tx_queue_id=M, ...); > .... > rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, > tx_queue_id=K, ...); > > I.E. feed rx queue from 2 tx queues? > > Two problems for this: > 1. If we have 2 tx queues for rx, the thread should make the extra judgement > to > decide which one to choose in the driver layer.
Not sure, why on the driver layer? The example I gave above - decision is made on application layer. Lets say first call didn't free enough mbufs, so app decided to use second txq for rearm. > On the other hand, current mechanism can support users to switch 1 txq to > another timely > in the application layer. If user want to choose another txq, he just need to > change the txq_queue_id parameter > in the API. > 2. If you want one rxq to support two txq at the same time, this needs to add > spinlock on guard variable to > avoid multi-thread conflict. Spinlock will decrease the data-path performance > greatly. Thus, we do not consider > 1 rxq mapping multiple txqs here. I am talking about situation when one thread controls 2 tx queues. > + * > + * @param rx_port_id > + * Port identifying the receive side. > + * @param rx_queue_id > + * The index of the receive queue identifying the receive side. > + * The value must be in the range [0, nb_rx_queue - 1] previously supplied > + * to rte_eth_dev_configure(). > + * @param tx_port_id > + * Port identifying the transmit side. > + * @param tx_queue_id > + * The index of the transmit queue identifying the transmit side. > + * The value must be in the range [0, nb_tx_queue - 1] previously supplied > + * to rte_eth_dev_configure(). > + * @param recycle_rxq_info > + * A pointer to a structure of type *rte_eth_recycle_rxq_info* which contains > + * the information of the Rx queue mbuf ring. > + * @return > + * The number of recycling mbufs. > + */ > +__rte_experimental > +static inline uint16_t > +rte_eth_recycle_mbufs(uint16_t rx_port_id, uint16_t rx_queue_id, > + uint16_t tx_port_id, uint16_t tx_queue_id, > + struct rte_eth_recycle_rxq_info *recycle_rxq_info) > +{ > + struct rte_eth_fp_ops *p; > + void *qd; > + uint16_t nb_mbufs; > + > +#ifdef RTE_ETHDEV_DEBUG_TX > + if (tx_port_id >= RTE_MAX_ETHPORTS || > + tx_queue_id >= RTE_MAX_QUEUES_PER_PORT) { > + RTE_ETHDEV_LOG(ERR, > + "Invalid tx_port_id=%u or tx_queue_id=%u\n", > + tx_port_id, tx_queue_id); > + return 0; > + } > +#endif > + > + /* fetch pointer to queue data */ > + p = &rte_eth_fp_ops[tx_port_id]; > + qd = p->txq.data[tx_queue_id]; > + > +#ifdef RTE_ETHDEV_DEBUG_TX > + RTE_ETH_VALID_PORTID_OR_ERR_RET(tx_port_id, 0); > + > + if (qd == NULL) { > + RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for port_id=%u\n", > + tx_queue_id, tx_port_id); > + return 0; > + } > +#endif > + if (p->recycle_tx_mbufs_reuse == NULL) > + return 0; > + > + /* Copy used *rte_mbuf* buffer pointers from Tx mbuf ring > + * into Rx mbuf ring. > + */ > + nb_mbufs = p->recycle_tx_mbufs_reuse(qd, recycle_rxq_info); > + > + /* If no recycling mbufs, return 0. */ > + if (nb_mbufs == 0) > + return 0; > + > +#ifdef RTE_ETHDEV_DEBUG_RX > + if (rx_port_id >= RTE_MAX_ETHPORTS || > + rx_queue_id >= RTE_MAX_QUEUES_PER_PORT) { > + RTE_ETHDEV_LOG(ERR, "Invalid rx_port_id=%u or rx_queue_id=%u\n", > + rx_port_id, rx_queue_id); > + return 0; > + } > +#endif > + > + /* fetch pointer to queue data */ > + p = &rte_eth_fp_ops[rx_port_id]; > + qd = p->rxq.data[rx_queue_id]; > + > +#ifdef RTE_ETHDEV_DEBUG_RX > + RTE_ETH_VALID_PORTID_OR_ERR_RET(rx_port_id, 0); > + > + if (qd == NULL) { > + RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n", > + rx_queue_id, rx_port_id); > + return 0; > + } > +#endif > + > + if (p->recycle_rx_descriptors_refill == NULL) > + return 0; > + > + /* Replenish the Rx descriptors with the recycling > + * into Rx mbuf ring. > + */ > + p->recycle_rx_descriptors_refill(qd, nb_mbufs); > + > + return nb_mbufs; > +} > + > /** > * @warning > * @b EXPERIMENTAL: this API may change without prior notice > diff --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h > index dcf8adab92..a2e6ea6b6c 100644 > --- a/lib/ethdev/rte_ethdev_core.h > +++ b/lib/ethdev/rte_ethdev_core.h > @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void *rxq, > uint16_t offset); > /** @internal Check the status of a Tx descriptor */ > typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset); > > +/** @internal Copy used mbufs from Tx mbuf ring into Rx mbuf ring */ > +typedef uint16_t (*eth_recycle_tx_mbufs_reuse_t)(void *txq, > + struct rte_eth_recycle_rxq_info *recycle_rxq_info); > + > +/** @internal Refill Rx descriptors with the recycling mbufs */ > +typedef void (*eth_recycle_rx_descriptors_refill_t)(void *rxq, uint16_t nb); > + > /** > * @internal > * Structure used to hold opaque pointers to internal ethdev Rx/Tx > @@ -90,9 +97,11 @@ struct rte_eth_fp_ops { > eth_rx_queue_count_t rx_queue_count; > /** Check the status of a Rx descriptor. */ > eth_rx_descriptor_status_t rx_descriptor_status; > + /** Refill Rx descriptors with the recycling mbufs. */ > + eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill; > I am afraid we can't put new fields here without ABI breakage. > > Agree > > It has to be below rxq. > Now thinking about current layout probably not the best one, > and when introducing this struct, I should probably put rxq either > on the top of the struct, or on the next cache line. > But such change is not possible right now anyway. > Same story for txq. > > Thus we should rearrange the structure like below: > struct rte_eth_fp_ops { > struct rte_ethdev_qdata rxq; > eth_rx_burst_t rx_pkt_burst; > eth_rx_queue_count_t rx_queue_count; > eth_rx_descriptor_status_t rx_descriptor_status; > eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill; > uintptr_t reserved1[2]; > } Yes, I think such layout will be better. The only problem here - we have to wait for 23.11 for that. > > > /** Rx queues data. */ > struct rte_ethdev_qdata rxq; > - uintptr_t reserved1[3]; > + uintptr_t reserved1[2]; > /**@}*/ > > /**@{*/ > @@ -106,9 +115,11 @@ struct rte_eth_fp_ops { > eth_tx_prep_t tx_pkt_prepare; > /** Check the status of a Tx descriptor. */ > eth_tx_descriptor_status_t tx_descriptor_status; > + /** Copy used mbufs from Tx mbuf ring into Rx. */ > + eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse; > /** Tx queues data. */ > struct rte_ethdev_qdata txq; > - uintptr_t reserved2[3]; > + uintptr_t reserved2[2]; > /**@}*/ > > } __rte_cache_aligned; > diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map > index 357d1a88c0..45c417f6bd 100644 > --- a/lib/ethdev/version.map > +++ b/lib/ethdev/version.map > @@ -299,6 +299,10 @@ EXPERIMENTAL { > rte_flow_action_handle_query_update; > rte_flow_async_action_handle_query_update; > rte_flow_async_create_by_index; > + > + # added in 23.07 > + rte_eth_recycle_mbufs; > + rte_eth_recycle_rx_queue_info_get; > }; > > INTERNAL { > -- > 2.25.1 >