Add option to get IB device after disabling RoCE. It is relevant if
there is vDPA class in device arguments list.

Signed-off-by: Michael Baum <michae...@nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_common_os.c | 126 ++++++++++++++++++++-
 1 file changed, 125 insertions(+), 1 deletion(-)

diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c 
b/drivers/common/mlx5/linux/mlx5_common_os.c
index 6f78897390..4a94865241 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -15,6 +15,7 @@
 #include <rte_string_fns.h>
 
 #include "mlx5_common.h"
+#include "mlx5_nl.h"
 #include "mlx5_common_log.h"
 #include "mlx5_common_os.h"
 #include "mlx5_glue.h"
@@ -39,6 +40,9 @@ const struct mlx5_glue *mlx5_glue;
 #define MLX5_TXDB_NCACHED 1
 #define MLX5_TXDB_HEURISTIC 2
 
+#define MLX5_VDPA_MAX_RETRIES 20
+#define MLX5_VDPA_USEC 1000
+
 int
 mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
 {
@@ -417,6 +421,123 @@ mlx5_glue_constructor(void)
        mlx5_glue = NULL;
 }
 
+/* Try to disable ROCE by Netlink\Devlink. */
+static int
+mlx5_nl_roce_disable(const char *addr)
+{
+       int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
+       int devlink_id;
+       int enable;
+       int ret;
+
+       if (nlsk_fd < 0)
+               return nlsk_fd;
+       devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
+       if (devlink_id < 0) {
+               ret = devlink_id;
+               DRV_LOG(DEBUG,
+                       "Failed to get devlink id for ROCE operations by 
Netlink.");
+               goto close;
+       }
+       ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
+       if (ret) {
+               DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
+                       ret);
+               goto close;
+       } else if (!enable) {
+               DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
+               goto close;
+       }
+       ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
+       if (ret)
+               DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
+       else
+               DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
+close:
+       close(nlsk_fd);
+       return ret;
+}
+
+/* Try to disable ROCE by sysfs. */
+static int
+mlx5_sys_roce_disable(const char *addr)
+{
+       FILE *file_o;
+       int enable;
+       int ret;
+
+       MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
+       file_o = fopen(file_p, "rb");
+       if (!file_o) {
+               rte_errno = ENOTSUP;
+               return -ENOTSUP;
+       }
+       ret = fscanf(file_o, "%d", &enable);
+       if (ret != 1) {
+               rte_errno = EINVAL;
+               ret = EINVAL;
+               goto close;
+       } else if (!enable) {
+               ret = 0;
+               DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
+               goto close;
+       }
+       fclose(file_o);
+       file_o = fopen(file_p, "wb");
+       if (!file_o) {
+               rte_errno = ENOTSUP;
+               return -ENOTSUP;
+       }
+       fprintf(file_o, "0\n");
+       ret = 0;
+close:
+       if (ret)
+               DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
+       else
+               DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
+       fclose(file_o);
+       return ret;
+}
+
+static int
+mlx5_roce_disable(struct rte_device *dev)
+{
+       char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
+
+       if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
+               return -rte_errno;
+       /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
+       if (mlx5_nl_roce_disable(pci_addr) != 0 &&
+           mlx5_sys_roce_disable(pci_addr) != 0)
+               return -rte_errno;
+       return 0;
+}
+
+static struct ibv_device *
+mlx5_vdpa_get_ibv_dev(struct rte_device *dev)
+{
+       struct ibv_device *ibv;
+       int retry;
+
+       if (mlx5_roce_disable(dev) != 0) {
+               DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
+                       dev->name);
+               return NULL;
+       }
+       /* Wait for the IB device to appear again after reload. */
+       for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
+               ibv = mlx5_os_get_ibv_dev(dev);
+               if (ibv != NULL)
+                       return ibv;
+               usleep(MLX5_VDPA_USEC);
+       }
+       DRV_LOG(ERR,
+               "Cannot get IB device after disabling RoCE for \"%s\", retries 
exceed %d.",
+               dev->name, MLX5_VDPA_MAX_RETRIES);
+       rte_errno = EAGAIN;
+       return NULL;
+}
+
 static int
 mlx5_config_doorbell_mapping_env(int dbnc)
 {
@@ -471,7 +592,10 @@ mlx5_os_devx_open_device(struct mlx5_dev_ctx *dev_ctx, 
struct rte_device *dev,
        struct ibv_context *ctx = NULL;
        int dbmap_env;
 
-       ibv = mlx5_os_get_ibv_dev(dev);
+       if (classes & MLX5_CLASS_VDPA)
+               ibv = mlx5_vdpa_get_ibv_dev(dev);
+       else
+               ibv = mlx5_os_get_ibv_dev(dev);
        if (!ibv)
                return -rte_errno;
        DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);
-- 
2.25.1

Reply via email to