> -----邮件原件----- > 发件人: Konstantin Ananyev <konstantin.v.anan...@yandex.ru> > 发送时间: Wednesday, October 12, 2022 6:21 AM > 收件人: Feifei Wang <feifei.wa...@arm.com>; tho...@monjalon.net; > Ferruh Yigit <ferruh.yi...@xilinx.com>; Andrew Rybchenko > <andrew.rybche...@oktetlabs.ru>; Ray Kinsella <m...@ashroe.eu> > 抄送: dev@dpdk.org; nd <n...@arm.com>; Honnappa Nagarahalli > <honnappa.nagaraha...@arm.com>; Ruifeng Wang > <ruifeng.w...@arm.com> > 主题: Re: 回复: [PATCH v2 1/3] ethdev: add API for direct rearm mode > > > > >>> Add API for enabling direct rearm mode and for mapping RX and TX > >>> queues. Currently, the API supports 1:1(txq : rxq) mapping. > >>> > >>> Furthermore, to avoid Rx load Tx data directly, add API called > >>> 'rte_eth_txq_data_get' to get Tx sw_ring and its information. > >>> > >>> Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > >>> Suggested-by: Ruifeng Wang <ruifeng.w...@arm.com> > >>> Signed-off-by: Feifei Wang <feifei.wa...@arm.com> > >>> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> > >>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > >>> --- > >>> lib/ethdev/ethdev_driver.h | 9 ++++ > >>> lib/ethdev/ethdev_private.c | 1 + > >>> lib/ethdev/rte_ethdev.c | 37 ++++++++++++++ > >>> lib/ethdev/rte_ethdev.h | 95 > >> ++++++++++++++++++++++++++++++++++++ > >>> lib/ethdev/rte_ethdev_core.h | 5 ++ > >>> lib/ethdev/version.map | 4 ++ > >>> 6 files changed, 151 insertions(+) > >>> > >>> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h > >>> index 47a55a419e..14f52907c1 100644 > >>> --- a/lib/ethdev/ethdev_driver.h > >>> +++ b/lib/ethdev/ethdev_driver.h > >>> @@ -58,6 +58,8 @@ struct rte_eth_dev { > >>> eth_rx_descriptor_status_t rx_descriptor_status; > >>> /** Check the status of a Tx descriptor */ > >>> eth_tx_descriptor_status_t tx_descriptor_status; > >>> + /** Use Tx mbufs for Rx to rearm */ > >>> + eth_rx_direct_rearm_t rx_direct_rearm; > >>> > >>> /** > >>> * Device data that is shared between primary and secondary > >>> processes @@ -486,6 +488,11 @@ typedef int > >> (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev, > >>> typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev, > >>> uint16_t rx_queue_id); > >>> > >>> +/**< @internal Get Tx information of a transmit queue of an > >>> +Ethernet device. */ typedef void (*eth_txq_data_get_t)(struct > rte_eth_dev *dev, > >>> + uint16_t tx_queue_id, > >>> + struct rte_eth_txq_data *txq_data); > >>> + > >>> /** @internal Release memory resources allocated by given Rx/Tx > queue. > >> */ > >>> typedef void (*eth_queue_release_t)(struct rte_eth_dev *dev, > >>> uint16_t queue_id); > >>> @@ -1138,6 +1145,8 @@ struct eth_dev_ops { > >>> eth_rxq_info_get_t rxq_info_get; > >>> /** Retrieve Tx queue information */ > >>> eth_txq_info_get_t txq_info_get; > >>> + /** Get the address where Tx data is stored */ > >>> + eth_txq_data_get_t txq_data_get; > >>> eth_burst_mode_get_t rx_burst_mode_get; /**< Get Rx burst > >> mode */ > >>> eth_burst_mode_get_t tx_burst_mode_get; /**< Get Tx burst > >> mode */ > >>> eth_fw_version_get_t fw_version_get; /**< Get firmware > >> version */ > >>> diff --git a/lib/ethdev/ethdev_private.c > >>> b/lib/ethdev/ethdev_private.c index 48090c879a..bfe16c7d77 100644 > >>> --- a/lib/ethdev/ethdev_private.c > >>> +++ b/lib/ethdev/ethdev_private.c > >>> @@ -276,6 +276,7 @@ eth_dev_fp_ops_setup(struct rte_eth_fp_ops > >> *fpo, > >>> fpo->rx_queue_count = dev->rx_queue_count; > >>> fpo->rx_descriptor_status = dev->rx_descriptor_status; > >>> fpo->tx_descriptor_status = dev->tx_descriptor_status; > >>> + fpo->rx_direct_rearm = dev->rx_direct_rearm; > >>> > >>> fpo->rxq.data = dev->data->rx_queues; > >>> fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs; > >>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index > >>> 0c2c1088c0..0dccec2e4b 100644 > >>> --- a/lib/ethdev/rte_ethdev.c > >>> +++ b/lib/ethdev/rte_ethdev.c > >>> @@ -1648,6 +1648,43 @@ rte_eth_dev_is_removed(uint16_t port_id) > >>> return ret; > >>> } > >>> > >>> +int > >>> +rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id, > >>> + struct rte_eth_txq_data *txq_data) { > >>> + struct rte_eth_dev *dev; > >>> + > >>> + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); > >>> + dev = &rte_eth_devices[port_id]; > >>> + > >>> + if (queue_id >= dev->data->nb_tx_queues) { > >>> + RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u\n", > >> queue_id); > >>> + return -EINVAL; > >>> + } > >>> + > >>> + if (txq_data == NULL) { > >>> + RTE_ETHDEV_LOG(ERR, "Cannot get ethdev port %u Tx > >> queue %u data to NULL\n", > >>> + port_id, queue_id); > >>> + return -EINVAL; > >>> + } > >>> + > >>> + if (dev->data->tx_queues == NULL || > >>> + dev->data->tx_queues[queue_id] == NULL) { > >>> + RTE_ETHDEV_LOG(ERR, > >>> + "Tx queue %"PRIu16" of device with port_id=%" > >>> + PRIu16" has not been setup\n", > >>> + queue_id, port_id); > >>> + return -EINVAL; > >>> + } > >>> + > >>> + if (*dev->dev_ops->txq_data_get == NULL) > >>> + return -ENOTSUP; > >>> + > >>> + dev->dev_ops->txq_data_get(dev, queue_id, txq_data); > >>> + > >>> + return 0; > >>> +} > >>> + > >>> static int > >>> rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split > *rx_seg, > >>> uint16_t n_seg, uint32_t *mbp_buf_size, > >>> diff --git > >>> a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index > >>> 2e783536c1..daf7f05d62 100644 > >>> --- a/lib/ethdev/rte_ethdev.h > >>> +++ b/lib/ethdev/rte_ethdev.h > >>> @@ -1949,6 +1949,23 @@ struct rte_eth_txq_info { > >>> uint8_t queue_state; /**< one of RTE_ETH_QUEUE_STATE_*. > >>> */ > >>> } __rte_cache_min_aligned; > >>> > >>> +/** > >>> + * @internal > >>> + * Structure used to hold pointers to internal ethdev Tx data. > >>> + * The main purpose is to load and store Tx queue data in direct > >>> +rearm > >> mode. > >>> + */ > >>> +struct rte_eth_txq_data { > >>> + uint64_t *offloads; > >>> + void *tx_sw_ring; > >>> + volatile void *tx_ring; > >>> + uint16_t *tx_next_dd; > >>> + uint16_t *nb_tx_free; > >>> + uint16_t nb_tx_desc; > >>> + uint16_t tx_rs_thresh; > >>> + uint16_t tx_free_thresh; > >>> +} __rte_cache_min_aligned; > >>> + > >> > >> first of all it is not clear why this struct has to be in public > >> header, why it can't be in on of ethdev 'private' headers. > >> Second it looks like a snippet from private txq fields for some Intel > >> (and alike) PMDs (i40e, ice, etc.). > >> How it supposed to to be universal and be applicable for any PMD that > >> decides to implement this new API? > >> > >> > >>> /* Generic Burst mode flag definition, values can be ORed. */ > >>> > >>> /** > >>> @@ -4718,6 +4735,27 @@ int rte_eth_remove_rx_callback(uint16_t > >> port_id, uint16_t queue_id, > >>> int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id, > >>> const struct rte_eth_rxtx_callback *user_cb); > >>> > >>> +/** > >>> + * Get the address which Tx data is stored. > >>> + * > >>> + * @param port_id > >>> + * The port identifier of the Ethernet device. > >>> + * @param queue_id > >>> + * The Tx queue on the Ethernet device for which information > >>> + * will be retrieved. > >>> + * @param txq_data > >>> + * A pointer to a structure of type *rte_eth_txq_data* to be filled. > >>> + * > >>> + * @return > >>> + * - 0: Success > >>> + * - -ENODEV: If *port_id* is invalid. > >>> + * - -ENOTSUP: routine is not supported by the device PMD. > >>> + * - -EINVAL: The queue_id is out of range. > >>> + */ > >>> +__rte_experimental > >>> +int rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id, > >>> + struct rte_eth_txq_data *txq_data); > >>> + > >>> /** > >>> * Retrieve information about given port's Rx queue. > >>> * > >>> @@ -6209,6 +6247,63 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t > >> queue_id, > >>> return rte_eth_tx_buffer_flush(port_id, queue_id, buffer); > >>> } > >>> > >>> +/** > >>> + * @warning > >>> + * @b EXPERIMENTAL: this API may change, or be removed, without > >>> +prior notice > >>> + * > >>> + * Put Tx buffers into Rx sw-ring and rearm descs. > >>> + * > >>> + * @param port_id > >>> + * Port identifying the receive side. > >>> + * @param queue_id > >>> + * The index of the transmit queue identifying the receive side. > >>> + * The value must be in the range [0, nb_rx_queue - 1] previously > >> supplied > >>> + * to rte_eth_dev_configure(). > >>> + * @param txq_data > >>> + * A pointer to a structure of type *rte_eth_txq_data* to be filled. > >>> + * @return > >>> + * The number of direct-rearmed buffers. > >>> + */ > >>> +__rte_experimental > >>> +static __rte_always_inline uint16_t > >>> +rte_eth_rx_direct_rearm(uint16_t port_id, uint16_t queue_id, > >>> + struct rte_eth_txq_data *txq_data) { > >>> + uint16_t nb_rearm; > >>> + struct rte_eth_fp_ops *p; > >>> + void *qd; > >>> + > >>> +#ifdef RTE_ETHDEV_DEBUG_RX > >>> + if (port_id >= RTE_MAX_ETHPORTS || > >>> + queue_id >= RTE_MAX_QUEUES_PER_PORT) { > >>> + RTE_ETHDEV_LOG(ERR, > >>> + "Invalid port_id=%u or queue_id=%u\n", > >>> + port_id, queue_id); > >>> + return 0; > >>> + } > >>> +#endif > >>> + > >>> + p = &rte_eth_fp_ops[port_id]; > >>> + qd = p->rxq.data[queue_id]; > >>> + > >>> +#ifdef RTE_ETHDEV_DEBUG_RX > >>> + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); > >>> + > >>> + if (qd == NULL) { > >>> + RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for > >> port_id=%u\n", > >>> + queue_id, port_id); > >>> + return 0; > >>> + } > >>> + > >>> + if (!p->rx_direct_rearm) > >> > >> This check should be done always (unconditionally). > >> it is not a mandatory function for the driver (it can safely skip to > implement it). > >> > >>> + return -ENOTSUP; > >> > >> This function returns uint16_t, why signed integers here? > >> > >> > >>> +#endif > >>> + > >>> + nb_rearm = p->rx_direct_rearm(qd, txq_data); > >> > >> So rx_direct_rearm() function knows how to extract data from TX queue? > >> As I understand that is possible only in one case: > >> rx_direct_rearm() has full knowledge and acess of txq internals, etc. > >> That means that rxq and txq have to belong to the same driver and > >> device type. > >> > > Thanks for the comments, and I have some questions for this. > > > >> First of all, I still think it is not the best design choice. > >> If we going ahead with introducing this feature, it better be as > >> generic as possible. > >> Plus it mixes TX and RX code-paths together, while it would be much > >> better to to keep them independent as they are right now. > >> > >> Another thing with such approach - even for the same PMD, both TXQ > >> and RXQ can have different internal data format and behavior logic > >> (depending on port/queue configuration). > > 1. Here TXQ and RXQ have different internal format means the queue > > type and descs can be different, right? If I understand correctly, > > based on your first strategy, is it means we will need different > > 'rearm_func' for different queue type in the same PMD? > > Yes, I think so. > If let say we have some PMD where depending on the config, there cpuld be > 2 different RXQ formats: rxq_a and rxq_b, and 2 different txq formats: txq_c, > txq_d. > Then assuming PMD would like to support direct-rearm mode for all four > combinations, it needs 4 different rearm functions: > > rearm_txq_c_to_rxq_a() > rearm_txq_c_to_rxq_b() > rearm_txq_d_to_rxq_a() > rearm_txq_d_to_rxq_b() > Thank you for your detailed explanation, I can understand this. > > > > >> So rx_direct_rearm() function selection have to be done based on both > >> RXQ and TXQ config. > >> So instead of rte_eth_tx_queue_data_get(), you'll probably need: > >> eth_rx_direct_rearm_t rte_eth_get_rx_direct_rearm_func(rx_port, > >> rx_queue, tx_port, tx_queue); > >> Then, it will be user responsibility to store it somewhere and call > periodically: > >> > >> control_path: > >> ... > >> rearm_func = rte_eth_get_rx_direct_rearm_func(rxport, rxqueue, > >> txport, txqueue); > >> data-path: > >> while(...) { > >> rearm_func(rxport, txport, rxqueue, txqueue); > >> rte_eth_rx_burst(rxport, rxqueue, ....); > >> rte_eth_tx_burst(txport, txqueue, ....); > >> } > >> > >> > >> In that case there seems absolutely no point to introduce struct > >> rte_eth_txq_data. rx_direct_rearm() accesses TXQ private data > >> directly anyway. > > 2. This is a very good proposal and it will be our first choice. > > Before working on it, I have a few questions about how to implement > 'rearm_func'. > > Like you say above, mixed Rx and Tx path code in 'rearm_func' means > > the hard-code is mixed like: > > rearm_func(...) { > > ... > > txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]; > > for (...) { > > rxep[i].mbuf = txep[i].mbuf; > > mb0 = txep[i].mbuf; > > paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM; > > dma_addr0 = vdupq_n_u64(paddr); > > vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0); > > } > > } > > Is my understanding is right? > > > Sorry, I don't understand the question. > Can you probably elaborate a bit?
Sorry for my unclear expression. I mean if we need two func which contains tx and rx paths code respectively in rearm_func, like: rearm_func(...) { rte_tx_fill_sw_ring; rte_rx_rearm_descs; } Or just mixed tx and rx path code like I said before. I prefer 'rx and tx hard code mixed', because from the performance perspective, this can reduce the cost of function calls. > > > > >> > >> Another way - make rte_eth_txq_data totally opaque and allow PMD to > >> store there some data that will help it to distinguish expected TXQ format. > >> That will allow PMD to keep rx_direct_rearm() the same for all > >> supported TXQ formats (it will make decision internally based on data > >> stored in txq_data). > >> Though in that case you'll probably need one more dev-op to free > txq_data. > >