> There is a confusion on setting max Rx packet length, this patch aims to
> clarify it.
>
> 'rte_eth_dev_configure()' API accepts max Rx packet size via
> 'uint32_t max_rx_pkt_len' field of the config struct 'struct
> rte_eth_conf'.
>
> Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result
> stored into '(struct rte_eth_dev)->data->mtu'.
>
> These two APIs are related but they work in a disconnected way, they
> store the set values in different variables which makes hard to figure
> out which one to use, also having two different method for a related
> functionality is confusing for the users.
>
> Other issues causing confusion is:
> * maximum transmission unit (MTU) is payload of the Ethernet frame. And
> 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is
> Ethernet frame overhead, and this overhead may be different from
> device to device based on what device supports, like VLAN and QinQ.
> * 'max_rx_pkt_len' is only valid when application requested jumbo frame,
> which adds additional confusion and some APIs and PMDs already
> discards this documented behavior.
> * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory
> field, this adds configuration complexity for application.
>
> As solution, both APIs gets MTU as parameter, and both saves the result
> in same variable '(struct rte_eth_dev)->data->mtu'. For this
> 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent
> from jumbo frame.
>
> For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user
> request and it should be used only within configure function and result
> should be stored to '(struct rte_eth_dev)->data->mtu'. After that point
> both application and PMD uses MTU from this variable.
>
> When application doesn't provide an MTU during 'rte_eth_dev_configure()'
> default 'RTE_ETHER_MTU' value is used.
>
> Additional clarification done on scattered Rx configuration, in
> relation to MTU and Rx buffer size.
> MTU is used to configure the device for physical Rx/Tx size limitation,
> Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer
> size as Rx buffer size.
> PMDs compare MTU against Rx buffer size to decide enabling scattered Rx
> or not. If scattered Rx is not supported by device, MTU bigger than Rx
> buffer size should fail.
LGTM in general, one question below.
...
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index daf5ca924221..4d0584af52e3 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1324,6 +1324,19 @@ eth_dev_validate_offloads(uint16_t port_id, uint64_t
> req_offloads,
> return ret;
> }
>
> +static uint16_t
> +eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu)
> +{
> + uint16_t overhead_len;
> +
> + if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu)
> + overhead_len = max_rx_pktlen - max_mtu;
In theory it could be overflow here, though I do realize that in practise it is
unlikely situation.
Anyway why uint16_t, why not uint32_t for all variables here?
Just no to worry about such things.
> + else
> + overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> +
> + return overhead_len;
> +}
> +
> int
> rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
> const struct rte_eth_conf *dev_conf)
> @@ -1331,6 +1344,7 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t
> nb_rx_q, uint16_t nb_tx_q,
> struct rte_eth_dev *dev;
> struct rte_eth_dev_info dev_info;
> struct rte_eth_conf orig_conf;
> + uint32_t max_rx_pktlen;
> uint16_t overhead_len;
> int diag;
> int ret;
> @@ -1381,11 +1395,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t
> nb_rx_q, uint16_t nb_tx_q,
> goto rollback;
>
> /* Get the real Ethernet overhead length */
> - if (dev_info.max_mtu != UINT16_MAX &&
> - dev_info.max_rx_pktlen > dev_info.max_mtu)
> - overhead_len = dev_info.max_rx_pktlen - dev_info.max_mtu;
> - else
> - overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> + overhead_len = eth_dev_get_overhead_len(dev_info.max_rx_pktlen,
> + dev_info.max_mtu);
>
> /* If number of queues specified by application for both Rx and Tx is
> * zero, use driver preferred values. This cannot be done individually
> @@ -1454,49 +1465,45 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t
> nb_rx_q, uint16_t nb_tx_q,
> }
>
> /*
> - * If jumbo frames are enabled, check that the maximum RX packet
> - * length is supported by the configured device.
> + * Check that the maximum RX packet length is supported by the
> + * configured device.
> */
> - if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
> - if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) {
> - RTE_ETHDEV_LOG(ERR,
> - "Ethdev port_id=%u max_rx_pkt_len %u > max
> valid value %u\n",
> - port_id, dev_conf->rxmode.max_rx_pkt_len,
> - dev_info.max_rx_pktlen);
> - ret = -EINVAL;
> - goto rollback;
> - } else if (dev_conf->rxmode.max_rx_pkt_len < RTE_ETHER_MIN_LEN)
> {
> - RTE_ETHDEV_LOG(ERR,
> - "Ethdev port_id=%u max_rx_pkt_len %u < min
> valid value %u\n",
> - port_id, dev_conf->rxmode.max_rx_pkt_len,
> - (unsigned int)RTE_ETHER_MIN_LEN);
> - ret = -EINVAL;
> - goto rollback;
> - }
> + if (dev_conf->rxmode.mtu == 0)
> + dev->data->dev_conf.rxmode.mtu = RTE_ETHER_MTU;
> + max_rx_pktlen = dev->data->dev_conf.rxmode.mtu + overhead_len;
> + if (max_rx_pktlen > dev_info.max_rx_pktlen) {
> + RTE_ETHDEV_LOG(ERR,
> + "Ethdev port_id=%u max_rx_pktlen %u > max valid value
> %u\n",
> + port_id, max_rx_pktlen, dev_info.max_rx_pktlen);
> + ret = -EINVAL;
> + goto rollback;
> + } else if (max_rx_pktlen < RTE_ETHER_MIN_LEN) {
> + RTE_ETHDEV_LOG(ERR,
> + "Ethdev port_id=%u max_rx_pktlen %u < min valid value
> %u\n",
> + port_id, max_rx_pktlen, RTE_ETHER_MIN_LEN);
> + ret = -EINVAL;
> + goto rollback;
> + }
>
> - /* Scale the MTU size to adapt max_rx_pkt_len */
> - dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
> - overhead_len;
> - } else {
> - uint16_t pktlen = dev_conf->rxmode.max_rx_pkt_len;
> - if (pktlen < RTE_ETHER_MIN_MTU + overhead_len ||
> - pktlen > RTE_ETHER_MTU + overhead_len)
> + if ((dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
> + if (dev->data->dev_conf.rxmode.mtu < RTE_ETHER_MIN_MTU ||
> + dev->data->dev_conf.rxmode.mtu > RTE_ETHER_MTU)
> /* Use default value */
> - dev->data->dev_conf.rxmode.max_rx_pkt_len =
> - RTE_ETHER_MTU + overhead_len;
> + dev->data->dev_conf.rxmode.mtu = RTE_ETHER_MTU;
> }
>
> + dev->data->mtu = dev->data->dev_conf.rxmode.mtu;
> +
> /*
> * If LRO is enabled, check that the maximum aggregated packet
> * size is supported by the configured device.
> */
> if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_TCP_LRO) {
> if (dev_conf->rxmode.max_lro_pkt_size == 0)
> - dev->data->dev_conf.rxmode.max_lro_pkt_size =
> - dev->data->dev_conf.rxmode.max_rx_pkt_len;
> + dev->data->dev_conf.rxmode.max_lro_pkt_size =
> max_rx_pktlen;
> ret = eth_dev_check_lro_pkt_size(port_id,
> dev->data->dev_conf.rxmode.max_lro_pkt_size,
> - dev->data->dev_conf.rxmode.max_rx_pkt_len,
> + max_rx_pktlen,
> dev_info.max_lro_pkt_size);
> if (ret != 0)
> goto rollback;
> @@ -2156,13 +2163,20 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t
> rx_queue_id,
> * If LRO is enabled, check that the maximum aggregated packet
> * size is supported by the configured device.
> */
> + /* Get the real Ethernet overhead length */
> if (local_conf.offloads & DEV_RX_OFFLOAD_TCP_LRO) {
> + uint16_t overhead_len;
> + uint32_t max_rx_pktlen;
> + int ret;
> +
> + overhead_len = eth_dev_get_overhead_len(dev_info.max_rx_pktlen,
> + dev_info.max_mtu);
> + max_rx_pktlen = dev->data->mtu + overhead_len;
> if (dev->data->dev_conf.rxmode.max_lro_pkt_size == 0)
> - dev->data->dev_conf.rxmode.max_lro_pkt_size =
> - dev->data->dev_conf.rxmode.max_rx_pkt_len;
> - int ret = eth_dev_check_lro_pkt_size(port_id,
> + dev->data->dev_conf.rxmode.max_lro_pkt_size =
> max_rx_pktlen;
> + ret = eth_dev_check_lro_pkt_size(port_id,
> dev->data->dev_conf.rxmode.max_lro_pkt_size,
> - dev->data->dev_conf.rxmode.max_rx_pkt_len,
> + max_rx_pktlen,
> dev_info.max_lro_pkt_size);
> if (ret != 0)
> return ret;
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index afdc53b674cc..9fba2bd73c84 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -416,7 +416,7 @@ enum rte_eth_tx_mq_mode {
> struct rte_eth_rxmode {
> /** The multi-queue packet distribution mode to be used, e.g. RSS. */
> enum rte_eth_rx_mq_mode mq_mode;
> - uint32_t max_rx_pkt_len; /**< Only used if JUMBO_FRAME enabled. */
> + uint32_t mtu; /**< Requested MTU. */
> /** Maximum allowed size of LRO aggregated packet. */
> uint32_t max_lro_pkt_size;
> uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/
> diff --git a/lib/ethdev/rte_ethdev_trace.h b/lib/ethdev/rte_ethdev_trace.h
> index 0036bda7465c..1491c815c312 100644
> --- a/lib/ethdev/rte_ethdev_trace.h
> +++ b/lib/ethdev/rte_ethdev_trace.h
> @@ -28,7 +28,7 @@ RTE_TRACE_POINT(
> rte_trace_point_emit_u16(nb_tx_q);
> rte_trace_point_emit_u32(dev_conf->link_speeds);
> rte_trace_point_emit_u32(dev_conf->rxmode.mq_mode);
> - rte_trace_point_emit_u32(dev_conf->rxmode.max_rx_pkt_len);
> + rte_trace_point_emit_u32(dev_conf->rxmode.mtu);
> rte_trace_point_emit_u64(dev_conf->rxmode.offloads);
> rte_trace_point_emit_u32(dev_conf->txmode.mq_mode);
> rte_trace_point_emit_u64(dev_conf->txmode.offloads);
> --
> 2.31.1