The rte_eth_dev_map_aggr_tx_affinity() was introduced in ethdev lib, it was used to set the affinity value per Tx queue.
This patch adds the MLX5 PMD support for two device ops: - map_aggr_tx_affinity - count_aggr_ports After maps a Tx queue with an aggregated port by call map_aggr_tx_affinity() and starts sending traffic, the MLX5 PMD updates TIS creation with tx_aggr_affinity value of Tx queue. TIS index 1 goes to first physical port, TIS index 2 goes to second physical port, and so on, TIS index 0 is reserved for default HW hash mode. Signed-off-by: Jiawei Wang <jiaw...@nvidia.com> Acked-by: Viacheslav Ovsiienko <viachesl...@nvidia.com> --- drivers/common/mlx5/mlx5_prm.h | 8 ------ drivers/net/mlx5/mlx5.c | 49 +++++++++++++++++----------------- drivers/net/mlx5/mlx5_devx.c | 24 +++++++++-------- drivers/net/mlx5/mlx5_tx.h | 4 +++ drivers/net/mlx5/mlx5_txq.c | 38 ++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 43 deletions(-) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 26a1f0717d..2f5aeecaa9 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -2363,14 +2363,6 @@ struct mlx5_ifc_query_nic_vport_context_in_bits { u8 reserved_at_68[0x18]; }; -/* - * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa. - * Each TIS binds to one PF by setting lag_tx_port_affinity (>0). - * Once LAG enabled, we create multiple TISs and bind each one to - * different PFs, then TIS[i] gets affinity i+1 and goes to PF i+1. - */ -#define MLX5_IFC_LAG_MAP_TIS_AFFINITY(index, num) ((num) ? \ - (index) % (num) + 1 : 0) struct mlx5_ifc_tisc_bits { u8 strict_lag_tx_port_affinity[0x1]; u8 reserved_at_1[0x3]; diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index f55c1caca0..8c8f71d508 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1257,9 +1257,9 @@ mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev) static int mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh) { - int i; struct mlx5_devx_lag_context lag_ctx = { 0 }; struct mlx5_devx_tis_attr tis_attr = { 0 }; + int i; tis_attr.transport_domain = sh->td->id; if (sh->bond.n_port) { @@ -1273,35 +1273,30 @@ mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh) DRV_LOG(ERR, "Failed to query lag affinity."); return -1; } - if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) { - for (i = 0; i < sh->bond.n_port; i++) { - tis_attr.lag_tx_port_affinity = - MLX5_IFC_LAG_MAP_TIS_AFFINITY(i, - sh->bond.n_port); - sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, - &tis_attr); - if (!sh->tis[i]) { - DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device" - " %s.", i, sh->bond.n_port, - sh->ibdev_name); - return -1; - } - } + if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n", sh->bond.n_port, lag_ctx.tx_remap_affinity_1, lag_ctx.tx_remap_affinity_2); - return 0; - } - if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH) + else if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH) DRV_LOG(INFO, "Device %s enabled HW hash based LAG.", sh->ibdev_name); } - tis_attr.lag_tx_port_affinity = 0; - sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr); - if (!sh->tis[0]) { - DRV_LOG(ERR, "Failed to TIS 0 for bonding device" - " %s.", sh->ibdev_name); - return -1; + for (i = 0; i <= sh->bond.n_port; i++) { + /* + * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa. + * Each TIS binds to one PF by setting lag_tx_port_affinity (> 0). + * Once LAG enabled, we create multiple TISs and bind each one to + * different PFs, then TIS[i+1] gets affinity i+1 and goes to PF i+1. + * TIS[0] is reserved for HW Hash mode. + */ + tis_attr.lag_tx_port_affinity = i; + sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr); + if (!sh->tis[i]) { + DRV_LOG(ERR, "Failed to create TIS %d/%d for [bonding] device" + " %s.", i, sh->bond.n_port, + sh->ibdev_name); + return -1; + } } return 0; } @@ -2335,6 +2330,8 @@ const struct eth_dev_ops mlx5_dev_ops = { .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, .get_monitor_addr = mlx5_get_monitor_addr, + .count_aggr_ports = mlx5_count_aggr_ports, + .map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity, }; /* Available operations from secondary process. */ @@ -2358,6 +2355,8 @@ const struct eth_dev_ops mlx5_dev_sec_ops = { .tx_burst_mode_get = mlx5_tx_burst_mode_get, .get_module_info = mlx5_get_module_info, .get_module_eeprom = mlx5_get_module_eeprom, + .count_aggr_ports = mlx5_count_aggr_ports, + .map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity, }; /* Available operations in flow isolated mode. */ @@ -2422,6 +2421,8 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = { .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, .get_monitor_addr = mlx5_get_monitor_addr, + .count_aggr_ports = mlx5_count_aggr_ports, + .map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity, }; /** diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c index f6e1943fd7..d02cedb202 100644 --- a/drivers/net/mlx5/mlx5_devx.c +++ b/drivers/net/mlx5/mlx5_devx.c @@ -1190,17 +1190,19 @@ static uint32_t mlx5_get_txq_tis_num(struct rte_eth_dev *dev, uint16_t queue_idx) { struct mlx5_priv *priv = dev->data->dev_private; - int tis_idx; - - if (priv->sh->bond.n_port && priv->sh->lag.affinity_mode == - MLX5_LAG_MODE_TIS) { - tis_idx = (priv->lag_affinity_idx + queue_idx) % - priv->sh->bond.n_port; - DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.", - dev->data->port_id, queue_idx, tis_idx + 1, - priv->sh->lag.tx_remap_affinity[tis_idx]); - } else { - tis_idx = 0; + struct mlx5_txq_data *txq_data = (*priv->txqs)[queue_idx]; + int tis_idx = 0; + + if (priv->sh->bond.n_port) { + if (txq_data->tx_aggr_affinity) { + tis_idx = txq_data->tx_aggr_affinity; + } else if (priv->sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) { + tis_idx = (priv->lag_affinity_idx + queue_idx) % + priv->sh->bond.n_port + 1; + DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.", + dev->data->port_id, queue_idx, tis_idx, + priv->sh->lag.tx_remap_affinity[tis_idx - 1]); + } } MLX5_ASSERT(priv->sh->tis[tis_idx]); return priv->sh->tis[tis_idx]->id; diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h index a056be7ca8..d0c6303a2d 100644 --- a/drivers/net/mlx5/mlx5_tx.h +++ b/drivers/net/mlx5/mlx5_tx.h @@ -144,6 +144,7 @@ struct mlx5_txq_data { uint16_t inlen_send; /* Ordinary send data inline size. */ uint16_t inlen_empw; /* eMPW max packet size to inline. */ uint16_t inlen_mode; /* Minimal data length to inline. */ + uint8_t tx_aggr_affinity; /* TxQ affinity configuration. */ uint32_t qp_num_8s; /* QP number shifted by 8. */ uint64_t offloads; /* Offloads for Tx Queue. */ struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ @@ -218,6 +219,9 @@ void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); +int mlx5_count_aggr_ports(struct rte_eth_dev *dev); +int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id, + uint8_t affinity); /* mlx5_tx.c */ diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index 419e913559..1e0e61a620 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -1365,3 +1365,41 @@ mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev) ts_mask : 0; } } + +int mlx5_count_aggr_ports(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + + return priv->sh->bond.n_port; +} + +int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id, + uint8_t affinity) +{ + struct mlx5_txq_ctrl *txq_ctrl; + struct mlx5_txq_data *txq; + struct mlx5_priv *priv; + + priv = dev->data->dev_private; + txq = (*priv->txqs)[tx_queue_id]; + if (!txq) + return -1; + txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); + if (tx_queue_id >= priv->txqs_n) { + DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", + dev->data->port_id, tx_queue_id, priv->txqs_n); + rte_errno = EOVERFLOW; + return -rte_errno; + } + if (affinity > priv->num_lag_ports) { + DRV_LOG(ERR, "port %u unable to setup Tx queue index %u" + " affinity is %u exceeds the maximum %u", dev->data->port_id, + tx_queue_id, affinity, priv->num_lag_ports); + rte_errno = EINVAL; + return -rte_errno; + } + DRV_LOG(DEBUG, "port %u configuring queue %u for aggregated affinity %u", + dev->data->port_id, tx_queue_id, affinity); + txq_ctrl->txq.tx_aggr_affinity = affinity; + return 0; +} -- 2.18.1