Add option to get IB device after disabling RoCE. It is relevant if there is vDPA class in device arguments list.
Signed-off-by: Michael Baum <michae...@nvidia.com> --- drivers/common/mlx5/linux/mlx5_common_os.c | 126 ++++++++++++++++++++- 1 file changed, 125 insertions(+), 1 deletion(-) diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c index 6f78897390..4a94865241 100644 --- a/drivers/common/mlx5/linux/mlx5_common_os.c +++ b/drivers/common/mlx5/linux/mlx5_common_os.c @@ -15,6 +15,7 @@ #include <rte_string_fns.h> #include "mlx5_common.h" +#include "mlx5_nl.h" #include "mlx5_common_log.h" #include "mlx5_common_os.h" #include "mlx5_glue.h" @@ -39,6 +40,9 @@ const struct mlx5_glue *mlx5_glue; #define MLX5_TXDB_NCACHED 1 #define MLX5_TXDB_HEURISTIC 2 +#define MLX5_VDPA_MAX_RETRIES 20 +#define MLX5_VDPA_USEC 1000 + int mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr) { @@ -417,6 +421,123 @@ mlx5_glue_constructor(void) mlx5_glue = NULL; } +/* Try to disable ROCE by Netlink\Devlink. */ +static int +mlx5_nl_roce_disable(const char *addr) +{ + int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC); + int devlink_id; + int enable; + int ret; + + if (nlsk_fd < 0) + return nlsk_fd; + devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd); + if (devlink_id < 0) { + ret = devlink_id; + DRV_LOG(DEBUG, + "Failed to get devlink id for ROCE operations by Netlink."); + goto close; + } + ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable); + if (ret) { + DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.", + ret); + goto close; + } else if (!enable) { + DRV_LOG(INFO, "ROCE has already disabled(Netlink)."); + goto close; + } + ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0); + if (ret) + DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret); + else + DRV_LOG(INFO, "ROCE is disabled by Netlink successfully."); +close: + close(nlsk_fd); + return ret; +} + +/* Try to disable ROCE by sysfs. */ +static int +mlx5_sys_roce_disable(const char *addr) +{ + FILE *file_o; + int enable; + int ret; + + MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr); + file_o = fopen(file_p, "rb"); + if (!file_o) { + rte_errno = ENOTSUP; + return -ENOTSUP; + } + ret = fscanf(file_o, "%d", &enable); + if (ret != 1) { + rte_errno = EINVAL; + ret = EINVAL; + goto close; + } else if (!enable) { + ret = 0; + DRV_LOG(INFO, "ROCE has already disabled(sysfs)."); + goto close; + } + fclose(file_o); + file_o = fopen(file_p, "wb"); + if (!file_o) { + rte_errno = ENOTSUP; + return -ENOTSUP; + } + fprintf(file_o, "0\n"); + ret = 0; +close: + if (ret) + DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret); + else + DRV_LOG(INFO, "ROCE is disabled by sysfs successfully."); + fclose(file_o); + return ret; +} + +static int +mlx5_roce_disable(struct rte_device *dev) +{ + char pci_addr[PCI_PRI_STR_SIZE] = { 0 }; + + if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0) + return -rte_errno; + /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */ + if (mlx5_nl_roce_disable(pci_addr) != 0 && + mlx5_sys_roce_disable(pci_addr) != 0) + return -rte_errno; + return 0; +} + +static struct ibv_device * +mlx5_vdpa_get_ibv_dev(struct rte_device *dev) +{ + struct ibv_device *ibv; + int retry; + + if (mlx5_roce_disable(dev) != 0) { + DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".", + dev->name); + return NULL; + } + /* Wait for the IB device to appear again after reload. */ + for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) { + ibv = mlx5_os_get_ibv_dev(dev); + if (ibv != NULL) + return ibv; + usleep(MLX5_VDPA_USEC); + } + DRV_LOG(ERR, + "Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.", + dev->name, MLX5_VDPA_MAX_RETRIES); + rte_errno = EAGAIN; + return NULL; +} + static int mlx5_config_doorbell_mapping_env(int dbnc) { @@ -471,7 +592,10 @@ mlx5_os_devx_open_device(struct mlx5_dev_ctx *dev_ctx, struct rte_device *dev, struct ibv_context *ctx = NULL; int dbmap_env; - ibv = mlx5_os_get_ibv_dev(dev); + if (classes & MLX5_CLASS_VDPA) + ibv = mlx5_vdpa_get_ibv_dev(dev); + else + ibv = mlx5_os_get_ibv_dev(dev); if (!ibv) return -rte_errno; DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name); -- 2.25.1