Multi-Packet RQ can further save PCIe bandwidth by posting a single large buffer for multiple packets.
Instead of posting a buffer per a packet, one large buffer is posted to receive multiple packets on the buffer. Add support for multi-packet RQ on Windows. The feature is disabled by default and can by enabled by setting mprq_en=1 in the PMD specific arguments. Signed-off-by: Tal Shnaiderman <tal...@nvidia.com> --- drivers/common/mlx5/mlx5_devx_cmds.c | 3 +++ drivers/common/mlx5/mlx5_devx_cmds.h | 2 ++ drivers/common/mlx5/windows/mlx5_win_defs.h | 7 +++++++ drivers/net/mlx5/windows/mlx5_os.c | 26 ++++++++++++++++++++++++++ 4 files changed, 38 insertions(+) diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c index d02ac2a678..7900254287 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.c +++ b/drivers/common/mlx5/mlx5_devx_cmds.c @@ -970,6 +970,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_CONN_TRACK_OFFLOAD); attr->rq_delay_drop = MLX5_GET(cmd_hca_cap, hcattr, rq_delay_drop); + attr->striding_rq = MLX5_GET(cmd_hca_cap, hcattr, striding_rq); + attr->ext_stride_num_range = + MLX5_GET(cmd_hca_cap, hcattr, ext_stride_num_range); if (hca_cap_2_sup) { hcattr = mlx5_devx_get_hca_cap(ctx, in, out, &rc, MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 | diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h index 1bac18c59d..2d813c0fdc 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.h +++ b/drivers/common/mlx5/mlx5_devx_cmds.h @@ -256,6 +256,8 @@ struct mlx5_hca_attr { uint32_t esw_mgr_vport_id_valid:1; /* E-Switch Mgr vport ID is valid. */ uint16_t esw_mgr_vport_id; /* E-Switch Mgr vport ID . */ uint16_t max_wqe_sz_sq; + uint32_t striding_rq:1; + uint32_t ext_stride_num_range:1; }; /* LAG Context. */ diff --git a/drivers/common/mlx5/windows/mlx5_win_defs.h b/drivers/common/mlx5/windows/mlx5_win_defs.h index 3554e4a7ff..822104a109 100644 --- a/drivers/common/mlx5/windows/mlx5_win_defs.h +++ b/drivers/common/mlx5/windows/mlx5_win_defs.h @@ -258,4 +258,11 @@ enum { MLX5_MATCH_MISC_PARAMETERS = 1 << 1, MLX5_MATCH_INNER_HEADERS = 1 << 2, }; + +#define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9 +#define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16 +#define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6 +#define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13 +#define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3 +#define IB_QPT_RAW_PACKET 8 #endif /* __MLX5_WIN_DEFS_H__ */ diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c index c7bb81549e..02438ece56 100644 --- a/drivers/net/mlx5/windows/mlx5_os.c +++ b/drivers/net/mlx5/windows/mlx5_os.c @@ -187,6 +187,32 @@ mlx5_os_capabilities_prepare(struct mlx5_dev_ctx_shared *sh) if (sh->dev_cap.tso) sh->dev_cap.tso_max_payload_sz = 1 << hca_attr->max_lso_cap; DRV_LOG(DEBUG, "Counters are not supported."); + if (hca_attr->striding_rq) { + sh->dev_cap.mprq.enabled = 1; + sh->dev_cap.mprq.log_min_stride_size = + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; + sh->dev_cap.mprq.log_max_stride_size = + MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES; + if (hca_attr->ext_stride_num_range) + sh->dev_cap.mprq.log_min_stride_num = + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + else + sh->dev_cap.mprq.log_min_stride_num = + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + sh->dev_cap.mprq.log_max_stride_num = + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES; + DRV_LOG(DEBUG, "\tmin_single_stride_log_num_of_bytes: %u", + sh->dev_cap.mprq.log_min_stride_size); + DRV_LOG(DEBUG, "\tmax_single_stride_log_num_of_bytes: %u", + sh->dev_cap.mprq.log_max_stride_size); + DRV_LOG(DEBUG, "\tmin_single_wqe_log_num_of_strides: %u", + sh->dev_cap.mprq.log_min_stride_num); + DRV_LOG(DEBUG, "\tmax_single_wqe_log_num_of_strides: %u", + sh->dev_cap.mprq.log_max_stride_num); + DRV_LOG(DEBUG, "\tmin_stride_wqe_log_size: %u", + sh->dev_cap.mprq.log_min_stride_wqe_size); + DRV_LOG(DEBUG, "Device supports Multi-Packet RQ."); + } if (hca_attr->rss_ind_tbl_cap) { /* * DPDK doesn't support larger/variable indirection tables. -- 2.16.1.windows.4