Hi,

> -----Original Message-----
> From: Vincent Jardin <[email protected]>
> Sent: Friday, March 13, 2026 12:01 AM
> To: [email protected]
> Cc: Raslan Darawsheh <[email protected]>; NBU-Contact-Thomas Monjalon
> (EXTERNAL) <[email protected]>; [email protected];
> Dariusz Sosnowski <[email protected]>; Slava Ovsiienko
> <[email protected]>; Bing Zhao <[email protected]>; Ori Kam
> <[email protected]>; Suanming Mou <[email protected]>; Matan Azrad
> <[email protected]>; [email protected]; Vincent Jardin
> <[email protected]>
> Subject: [PATCH v3 06/9] net/mlx5: add burst pacing devargs
> 
> Expose burst_upper_bound and typical_packet_size from the PRM
> set_pp_rate_limit_context as devargs:
> - tx_burst_bound=<bytes>: max burst before rate evaluation kicks in
> - tx_typical_pkt_sz=<bytes>: typical packet size for accuracy
> 
> These parameters apply to both per-queue rate limiting
> (rte_eth_set_queue_rate_limit) and Clock Queue pacing (tx_pp).

Clock Queue is special facility to overcome ConnectX-6DX hardware limitations
and handle send scheduling. It uses WQE rate pacing and doe not need 
the tx_burst_bound and tx_typical_pkt_sz be set. Please update the commit
message and remove update of mlx5_txpp_alloc_pp_index().

> 
> Values are validated against HCA capabilities (packet_pacing_burst_bound and
> packet_pacing_typical_size).
> If the HW does not support them, a warning is logged and the value is silently
> zeroed. Test mode still overrides both values.
> 
> Shared context mismatch checks ensure all ports on the same device use the
> same burst parameters.
> 
> Supported hardware:
> - ConnectX-6 Dx: burst_upper_bound and typical_packet_size
>   reported via packet_pacing_burst_bound / packet_pacing_typical_size
>   QoS capability bits
> - ConnectX-7/8: full support for both parameters
> - BlueField-2/3: same capabilities as host-side ConnectX
> 
> Not supported:
> - ConnectX-5: may not report burst_bound or typical_size caps
> - ConnectX-4 Lx and earlier: no packet_pacing at all
> 
> Signed-off-by: Vincent Jardin <[email protected]>
> ---
>  doc/guides/nics/mlx5.rst     | 16 ++++++++++++++
>  drivers/net/mlx5/mlx5.c      | 42 ++++++++++++++++++++++++++++++++++++
>  drivers/net/mlx5/mlx5.h      |  2 ++
>  drivers/net/mlx5/mlx5_txpp.c | 12 +++++++++++
>  4 files changed, 72 insertions(+)
> 
> diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index
> 5b097dbc90..2507fae846 100644
> --- a/doc/guides/nics/mlx5.rst
> +++ b/doc/guides/nics/mlx5.rst
> @@ -580,6 +580,22 @@ for an additional list of options shared with other
> mlx5 drivers.
>    (with ``tx_pp``) and ConnectX-7+ (wait-on-time) scheduling modes.
>    The default value is zero.
> 
> +- ``tx_burst_bound`` parameter [int]
> +
> +  Specifies the burst upper bound in bytes for packet pacing rate evaluation.
> +  When set, the hardware considers this burst size when enforcing the
> + configured  rate limit. Only effective when the HCA reports
> + ``packet_pacing_burst_bound``  capability. Applies to both per-queue
> + rate limiting
> +  (``rte_eth_set_queue_rate_limit()``) and Clock Queue pacing (``tx_pp``).
> +  The default value is zero (hardware default).
> +
> +- ``tx_typical_pkt_sz`` parameter [int]
> +
> +  Specifies the typical packet size in bytes for packet pacing rate
> + accuracy  improvement. Only effective when the HCA reports
> + ``packet_pacing_typical_size`` capability. Applies to both per-queue
> + rate  limiting and Clock Queue pacing. The default value is zero (hardware
> default).
> +
>  - ``tx_vec_en`` parameter [int]
> 
>    A nonzero value enables Tx vector with ConnectX-5 NICs and above.
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> c390406ac7..f399e0d5c9 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -119,6 +119,18 @@
>   */
>  #define MLX5_TX_SKEW "tx_skew"
> 
> +/*
> + * Device parameter to specify burst upper bound in bytes
> + * for packet pacing rate evaluation.
> + */
> +#define MLX5_TX_BURST_BOUND "tx_burst_bound"
> +
> +/*
> + * Device parameter to specify typical packet size in bytes
> + * for packet pacing rate accuracy improvement.
> + */
> +#define MLX5_TX_TYPICAL_PKT_SZ "tx_typical_pkt_sz"
> +
>  /*
>   * Device parameter to enable hardware Tx vector.
>   * Deprecated, ignored (no vectorized Tx routines anymore).
> @@ -1405,6 +1417,10 @@ mlx5_dev_args_check_handler(const char *key,
> const char *val, void *opaque)
>               config->tx_pp = tmp;
>       } else if (strcmp(MLX5_TX_SKEW, key) == 0) {
>               config->tx_skew = tmp;
> +     } else if (strcmp(MLX5_TX_BURST_BOUND, key) == 0) {
> +             config->tx_burst_bound = tmp;
> +     } else if (strcmp(MLX5_TX_TYPICAL_PKT_SZ, key) == 0) {
> +             config->tx_typical_pkt_sz = tmp;
>       } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) {
>               config->l3_vxlan_en = !!tmp;
>       } else if (strcmp(MLX5_VF_NL_EN, key) == 0) { @@ -1518,8 +1534,10
> @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh,
>                               struct mlx5_sh_config *config)
>  {
>       const char **params = (const char *[]){
> +             MLX5_TX_BURST_BOUND,
>               MLX5_TX_PP,
>               MLX5_TX_SKEW,
> +             MLX5_TX_TYPICAL_PKT_SZ,
>               MLX5_L3_VXLAN_EN,
>               MLX5_VF_NL_EN,
>               MLX5_DV_ESW_EN,
> @@ -1626,6 +1644,18 @@ mlx5_shared_dev_ctx_args_config(struct
> mlx5_dev_ctx_shared *sh,
>               DRV_LOG(WARNING,
>                       "\"tx_skew\" doesn't affect without \"tx_pp\".");
>       }
> +     if (config->tx_burst_bound &&
> +         !sh->cdev->config.hca_attr.qos.packet_pacing_burst_bound) {
> +             DRV_LOG(WARNING,
> +                     "HW does not support burst_upper_bound,
> ignoring.");
> +             config->tx_burst_bound = 0;
> +     }
> +     if (config->tx_typical_pkt_sz &&
> +         !sh->cdev->config.hca_attr.qos.packet_pacing_typical_size) {
> +             DRV_LOG(WARNING,
> +                     "HW does not support typical_packet_size, ignoring.");
> +             config->tx_typical_pkt_sz = 0;
> +     }
>       /* Check for LRO support. */
>       if (mlx5_devx_obj_ops_en(sh) && sh->cdev->config.hca_attr.lro_cap) {
>               /* TBD check tunnel lro caps. */
> @@ -3260,6 +3290,18 @@ mlx5_probe_again_args_validate(struct
> mlx5_common_device *cdev,
>                       sh->ibdev_name);
>               goto error;
>       }
> +     if (sh->config.tx_burst_bound != config->tx_burst_bound) {
> +             DRV_LOG(ERR, "\"tx_burst_bound\" "
> +                     "configuration mismatch for shared %s context.",
> +                     sh->ibdev_name);
> +             goto error;
> +     }
> +     if (sh->config.tx_typical_pkt_sz != config->tx_typical_pkt_sz) {
> +             DRV_LOG(ERR, "\"tx_typical_pkt_sz\" "
> +                     "configuration mismatch for shared %s context.",
> +                     sh->ibdev_name);
> +             goto error;
> +     }
>       if (sh->config.txq_mem_algn != config->txq_mem_algn) {
>               DRV_LOG(ERR, "\"TxQ memory alignment\" "
>                       "configuration mismatch for shared %s context. %u -
> %u", diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> c48c3072d1..a8d71482ac 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -382,6 +382,8 @@ struct mlx5_port_config {  struct mlx5_sh_config {
>       int tx_pp; /* Timestamp scheduling granularity in nanoseconds. */
>       int tx_skew; /* Tx scheduling skew between WQE and data on wire. */
> +     uint32_t tx_burst_bound; /* Burst upper bound in bytes, 0 = default. */
> +     uint32_t tx_typical_pkt_sz; /* Typical packet size in bytes, 0 =
> +default. */
>       uint32_t reclaim_mode:2; /* Memory reclaim mode. */
>       uint32_t dv_esw_en:1; /* Enable E-Switch DV flow. */
>       /* Enable DV flow. 1 means SW steering, 2 means HW steering. */ diff --
> git a/drivers/net/mlx5/mlx5_txpp.c b/drivers/net/mlx5/mlx5_txpp.c index
> 0a883b0a94..756a772cc5 100644
> --- a/drivers/net/mlx5/mlx5_txpp.c
> +++ b/drivers/net/mlx5/mlx5_txpp.c

Please remove the diffs from mlx5_txpp_alloc_pp_index().

> @@ -88,6 +88,12 @@ mlx5_txpp_alloc_pp_index(struct mlx5_dev_ctx_shared
> *sh)
>       rate = NS_PER_S / sh->txpp.tick;
>       if (rate * sh->txpp.tick != NS_PER_S)
>               DRV_LOG(WARNING, "Packet pacing frequency is not
> precise.");
> +     if (sh->config.tx_burst_bound)
> +             MLX5_SET(set_pp_rate_limit_context, &pp,
> +                      burst_upper_bound, sh->config.tx_burst_bound);
> +     if (sh->config.tx_typical_pkt_sz)
> +             MLX5_SET(set_pp_rate_limit_context, &pp,
> +                      typical_packet_size, sh->config.tx_typical_pkt_sz);
>       if (sh->txpp.test) {
>               uint32_t len;
> 
> @@ -172,6 +178,12 @@ mlx5_txq_alloc_pp_rate_limit(struct
> mlx5_dev_ctx_shared *sh,
>       memset(&pp, 0, sizeof(pp));
>       MLX5_SET(set_pp_rate_limit_context, &pp, rate_limit,
> (uint32_t)rate_kbps);
>       MLX5_SET(set_pp_rate_limit_context, &pp, rate_mode,
> MLX5_DATA_RATE);
> +     if (sh->config.tx_burst_bound)
> +             MLX5_SET(set_pp_rate_limit_context, &pp,
> +                      burst_upper_bound, sh->config.tx_burst_bound);
> +     if (sh->config.tx_typical_pkt_sz)
> +             MLX5_SET(set_pp_rate_limit_context, &pp,
> +                      typical_packet_size, sh->config.tx_typical_pkt_sz);
>       rl->pp = mlx5_glue->dv_alloc_pp(sh->cdev->ctx, sizeof(pp), &pp, 0);
>       if (rl->pp == NULL) {
>               DRV_LOG(ERR, "Failed to allocate PP index for rate %u Mbps.",
> --
> 2.43.0

With best regards,
Slava

Reply via email to