Add context device structure which contains ctx and pd of dievice. In addition, provides prepare and release functions for this structure.
Signed-off-by: Michael Baum <michae...@nvidia.com> --- drivers/common/mlx5/linux/mlx5_common_os.c | 144 ++++++++++++- drivers/common/mlx5/mlx5_common.c | 166 +++++++++++++++ drivers/common/mlx5/mlx5_common.h | 48 +++++ drivers/common/mlx5/version.map | 3 + drivers/common/mlx5/windows/mlx5_common_os.c | 207 ++++++++++++++++++- 5 files changed, 562 insertions(+), 6 deletions(-) diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c index 9e0c823c97..6f78897390 100644 --- a/drivers/common/mlx5/linux/mlx5_common_os.c +++ b/drivers/common/mlx5/linux/mlx5_common_os.c @@ -23,6 +23,22 @@ const struct mlx5_glue *mlx5_glue; #endif +/* Environment variable to control the doorbell register mapping. */ +#define MLX5_SHUT_UP_BF "MLX5_SHUT_UP_BF" +#if defined(RTE_ARCH_ARM64) +#define MLX5_SHUT_UP_BF_DEFAULT "0" +#else +#define MLX5_SHUT_UP_BF_DEFAULT "1" +#endif + +/* Default PMD specific parameter value. */ +#define MLX5_TXDB_UNSET (-1) + +/* MLX5_TX_DB_NC supported values. */ +#define MLX5_TXDB_CACHED 0 +#define MLX5_TXDB_NCACHED 1 +#define MLX5_TXDB_HEURISTIC 2 + int mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr) { @@ -401,6 +417,127 @@ mlx5_glue_constructor(void) mlx5_glue = NULL; } +static int +mlx5_config_doorbell_mapping_env(int dbnc) +{ + char *env; + int value; + + MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); + /* Get environment variable to store. */ + env = getenv(MLX5_SHUT_UP_BF); + value = env ? !!strcmp(env, "0") : MLX5_TXDB_UNSET; + if (dbnc == MLX5_TXDB_UNSET) + setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1); + else + setenv(MLX5_SHUT_UP_BF, + dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1); + return value; +} + +static void +mlx5_restore_doorbell_mapping_env(int value) +{ + MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); + /* Restore the original environment variable state. */ + if (value == MLX5_TXDB_UNSET) + unsetenv(MLX5_SHUT_UP_BF); + else + setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1); +} + +/** + * Function API to open IB device using DevX. + * + * This function calls the Linux glue APIs to open a device. + * + * @param dev_ctx + * Pointer to the context device data structure. + * @param dev + * Pointer to the generic device. + * @param dbnc + * Device argument help configure the environment variable. + * @param classes + * Chosen classes come from device arguments. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_os_devx_open_device(struct mlx5_dev_ctx *dev_ctx, struct rte_device *dev, + int dbnc, uint32_t classes) +{ + struct ibv_device *ibv; + struct ibv_context *ctx = NULL; + int dbmap_env; + + ibv = mlx5_os_get_ibv_dev(dev); + if (!ibv) + return -rte_errno; + DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name); + /* + * Configure environment variable "MLX5_BF_SHUT_UP" before the device + * creation. The rdma_core library checks the variable at device + * creation and stores the result internally. + */ + dbmap_env = mlx5_config_doorbell_mapping_env(dbnc); + /* Try to open IB device with DV. */ + errno = 0; + ctx = mlx5_glue->dv_open_device(ibv); + /* + * The environment variable is not needed anymore, all device creation + * attempts are completed. + */ + mlx5_restore_doorbell_mapping_env(dbmap_env); + if (ctx == NULL && classes != MLX5_CLASS_ETH) { + DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name); + rte_errno = errno ? errno : ENODEV; + return -rte_errno; + } + dev_ctx->ctx = ctx; + return 0; +} + +/** + * Allocate Protection Domain object and extract its pdn using DV API. + * + * @param[out] dev_ctx + * Pointer to the context device data structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_os_pd_create(struct mlx5_dev_ctx *dev_ctx) +{ +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + struct mlx5dv_obj obj; + struct mlx5dv_pd pd_info; + int ret; + + dev_ctx->pd = mlx5_glue->alloc_pd(dev_ctx->ctx); + if (dev_ctx->pd == NULL) { + DRV_LOG(ERR, "Failed to allocate PD."); + return errno ? -errno : -ENOMEM; + } + obj.pd.in = dev_ctx->pd; + obj.pd.out = &pd_info; + ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD); + if (ret != 0) { + DRV_LOG(ERR, "Fail to get PD object info."); + mlx5_glue->dealloc_pd(dev_ctx->pd); + dev_ctx->pd = NULL; + return -errno; + } + dev_ctx->pdn = pd_info.pdn; + return 0; +#else + (void)dev_ctx; + DRV_LOG(ERR, "Cannot get pdn - no DV support."); + return -ENOTSUP; +#endif /* HAVE_IBV_FLOW_DV_SUPPORT */ +} + struct ibv_device * mlx5_os_get_ibv_device(const struct rte_pci_addr *addr) { @@ -423,8 +560,13 @@ mlx5_os_get_ibv_device(const struct rte_pci_addr *addr) ibv_match = ibv_list[n]; break; } - if (ibv_match == NULL) + if (ibv_match == NULL) { + DRV_LOG(WARNING, + "No Verbs device matches PCI device " PCI_PRI_FMT "," + " are kernel drivers loaded?", + addr->domain, addr->bus, addr->devid, addr->function); rte_errno = ENOENT; + } mlx5_glue->free_device_list(ibv_list); return ibv_match; } diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c index 459cf4bcc4..be3d0f2627 100644 --- a/drivers/common/mlx5/mlx5_common.c +++ b/drivers/common/mlx5/mlx5_common.c @@ -41,6 +41,20 @@ static inline void mlx5_cpu_id(unsigned int level, } #endif +/* + * Device parameter to force doorbell register mapping to non-cahed region + * eliminating the extra write memory barrier. + */ +#define MLX5_TX_DB_NC "tx_db_nc" + +/* Default PMD specific parameter value. */ +#define MLX5_TXDB_UNSET (-1) + +/* MLX5_TX_DB_NC supported values. */ +#define MLX5_TXDB_CACHED 0 +#define MLX5_TXDB_NCACHED 1 +#define MLX5_TXDB_HEURISTIC 2 + RTE_LOG_REGISTER_DEFAULT(mlx5_common_logtype, NOTICE) /* Head of list of drivers. */ @@ -88,6 +102,83 @@ driver_get(uint32_t class) return NULL; } +/** + * Verify and store value for device argument. + * + * @param[in] key + * Key argument to verify. + * @param[in] val + * Value associated with key. + * @param opaque + * User data. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_common_args_check(const char *key, const char *val, void *opaque) +{ + int *dbnc = opaque; + signed long tmp; + + errno = 0; + tmp = strtol(val, NULL, 0); + if (errno) { + rte_errno = errno; + DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); + return -rte_errno; + } + if (strcmp(MLX5_TX_DB_NC, key) == 0) { + if (tmp != MLX5_TXDB_CACHED && + tmp != MLX5_TXDB_NCACHED && + tmp != MLX5_TXDB_HEURISTIC) { + DRV_LOG(ERR, "Invalid Tx doorbell mapping parameter."); + rte_errno = EINVAL; + return -rte_errno; + } + *dbnc = tmp; + } + return 0; +} + +/** + * Parse Tx doorbell mapping parameter. + * + * @param devargs + * Device arguments structure. + * @param dbnc + * Pointer to get into doorbell mapping parameter. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_parse_db_map_arg(struct rte_devargs *devargs, int *dbnc) +{ + struct rte_kvargs *kvlist; + int ret = 0; + + if (devargs == NULL) + return 0; + kvlist = rte_kvargs_parse(devargs->args, NULL); + if (kvlist == NULL) { + rte_errno = EINVAL; + return -rte_errno; + } + if (rte_kvargs_count(kvlist, MLX5_TX_DB_NC)) { + ret = rte_kvargs_process(kvlist, MLX5_TX_DB_NC, + mlx5_common_args_check, dbnc); + if (ret) { + rte_errno = EINVAL; + rte_kvargs_free(kvlist); + return -rte_errno; + } + } + rte_kvargs_free(kvlist); + return 0; +} + + static int devargs_class_handler(__rte_unused const char *key, const char *class_names, void *opaque) @@ -219,6 +310,81 @@ mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size) #endif } +/** + * Uninitialize context device and release all its resources. + * + * @param dev_ctx + * Pointer to the context device data structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +void +mlx5_dev_ctx_release(struct mlx5_dev_ctx *dev_ctx) +{ + if (dev_ctx->pd != NULL) { + claim_zero(mlx5_os_dealloc_pd(dev_ctx->pd)); + dev_ctx->pd = NULL; + } + if (dev_ctx->ctx != NULL) { + claim_zero(mlx5_glue->close_device(dev_ctx->ctx)); + dev_ctx->ctx = NULL; + } +} + +/** + * Initialize context device and allocate all its resources. + * + * @param dev_ctx + * Pointer to the context device data structure. + * @param dev + * Pointer to mlx5 device structure. + * @param classes_loaded + * Chosen classes come from device arguments. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_dev_ctx_prepare(struct mlx5_dev_ctx *dev_ctx, struct rte_device *dev, + uint32_t classes_loaded) +{ + int dbnc = MLX5_TXDB_UNSET; + int ret; + + dev_ctx->numa_node = dev->numa_node; + /* + * Parse Tx doorbell mapping parameter. It helps to configure + * environment variable "MLX5_BF_SHUT_UP" before the device creation. + */ + ret = mlx5_parse_db_map_arg(dev->devargs, &dbnc); + if (ret < 0) + return ret; + /* + * Open device using DevX. + * If DevX isn't supported, ctx field remains NULL. + */ + ret = mlx5_os_devx_open_device(dev_ctx, dev, dbnc, classes_loaded); + if (ret < 0) + return ret; + /* + * When DevX is not supported and the classes selected by the user can + * also work with Verbs, the mlx5_os_devx_open_device function returns + * 0 although no device has been created at this time. + * Later they will try to create again in Verbs. + */ + if (dev_ctx->ctx == NULL) + return 0; + /* Allocate Protection Domain object and extract its pdn. */ + ret = mlx5_os_pd_create(dev_ctx); + if (ret) + goto error; + return ret; +error: + mlx5_dev_ctx_release(dev_ctx); + return ret; +} + static void dev_release(struct mlx5_common_device *dev) { diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h index a772371200..609953b70e 100644 --- a/drivers/common/mlx5/mlx5_common.h +++ b/drivers/common/mlx5/mlx5_common.h @@ -324,6 +324,46 @@ void mlx5_common_init(void); * from devargs, locating target RDMA device and probing with it. */ +/** + * Shared device context structure. + * Contains HW device objects which belong to same device with multiple drivers. + */ +struct mlx5_dev_ctx { + void *ctx; /* Verbs/DV/DevX context. */ + void *pd; /* Protection Domain. */ + uint32_t pdn; /* Protection Domain Number. */ + int numa_node; /* Numa node of device. */ +}; + +/** + * Uninitialize context device and release all its resources. + * + * @param dev_ctx + * Pointer to the context device data structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +__rte_internal +void mlx5_dev_ctx_release(struct mlx5_dev_ctx *dev_ctx); + +/** + * Initialize context device and allocate all its resources. + * + * @param dev_ctx + * Pointer to the context device data structure. + * @param dev + * Pointer to mlx5 device structure. + * @param classes_loaded + * Chosen classes come from device arguments. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +__rte_internal +int mlx5_dev_ctx_prepare(struct mlx5_dev_ctx *dev_ctx, struct rte_device *dev, + uint32_t classes_loaded); + /** * Initialization function for the driver called during device probing. */ @@ -419,4 +459,12 @@ __rte_internal bool mlx5_dev_is_pci(const struct rte_device *dev); +/* mlx5_common_os.c */ + +int mlx5_os_devx_open_device(struct mlx5_dev_ctx *dev_ctx, + struct rte_device *dev, int dbnc, + uint32_t classes); +int mlx5_os_pd_create(struct mlx5_dev_ctx *dev_ctx); + + #endif /* RTE_PMD_MLX5_COMMON_H_ */ diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map index e5cb6b7060..6a88105d02 100644 --- a/drivers/common/mlx5/version.map +++ b/drivers/common/mlx5/version.map @@ -9,6 +9,9 @@ INTERNAL { mlx5_common_init; + mlx5_dev_ctx_release; + mlx5_dev_ctx_prepare; + mlx5_common_verbs_reg_mr; # WINDOWS_NO_EXPORT mlx5_common_verbs_dereg_mr; # WINDOWS_NO_EXPORT diff --git a/drivers/common/mlx5/windows/mlx5_common_os.c b/drivers/common/mlx5/windows/mlx5_common_os.c index 5031bdca26..5d178b0452 100644 --- a/drivers/common/mlx5/windows/mlx5_common_os.c +++ b/drivers/common/mlx5/windows/mlx5_common_os.c @@ -7,6 +7,7 @@ #include <stdio.h> #include <rte_mempool.h> +#include <rte_bus_pci.h> #include <rte_malloc.h> #include <rte_errno.h> @@ -17,7 +18,7 @@ #include "mlx5_malloc.h" /** - * Initialization routine for run-time dependency on external lib + * Initialization routine for run-time dependency on external lib. */ void mlx5_glue_constructor(void) @@ -25,7 +26,7 @@ mlx5_glue_constructor(void) } /** - * Allocate PD. Given a devx context object + * Allocate PD. Given a DevX context object * return an mlx5-pd object. * * @param[in] ctx @@ -37,8 +38,8 @@ mlx5_glue_constructor(void) void * mlx5_os_alloc_pd(void *ctx) { - struct mlx5_pd *ppd = mlx5_malloc(MLX5_MEM_ZERO, - sizeof(struct mlx5_pd), 0, SOCKET_ID_ANY); + struct mlx5_pd *ppd = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_pd), + 0, SOCKET_ID_ANY); if (!ppd) return NULL; @@ -60,7 +61,7 @@ mlx5_os_alloc_pd(void *ctx) * Pointer to mlx5_pd. * * @return - * Zero if pd is released successfully, negative number otherwise. + * Zero if pd is released successfully, negative number otherwise. */ int mlx5_os_dealloc_pd(void *pd) @@ -72,6 +73,202 @@ mlx5_os_dealloc_pd(void *pd) return 0; } +/** + * Detect if a devx_device_bdf object has identical DBDF values to the + * rte_pci_addr found in bus/pci probing + * + * @param[in] devx_bdf + * Pointer to the devx_device_bdf structure. + * @param[in] addr + * Pointer to the rte_pci_addr structure. + * + * @return + * 1 on Device match, 0 on mismatch. + */ +static int +mlx5_match_devx_bdf_to_addr(struct devx_device_bdf *devx_bdf, + struct rte_pci_addr *addr) +{ + if (addr->domain != (devx_bdf->bus_id >> 8) || + addr->bus != (devx_bdf->bus_id & 0xff) || + addr->devid != devx_bdf->dev_id || + addr->function != devx_bdf->fnc_id) { + return 0; + } + return 1; +} + +/** + * Detect if a devx_device_bdf object matches the rte_pci_addr + * found in bus/pci probing + * Compare both the Native/PF BDF and the raw_bdf representing a VF BDF. + * + * @param[in] devx_bdf + * Pointer to the devx_device_bdf structure. + * @param[in] addr + * Pointer to the rte_pci_addr structure. + * + * @return + * 1 on Device match, 0 on mismatch, rte_errno code on failure. + */ +static int +mlx5_match_devx_devices_to_addr(struct devx_device_bdf *devx_bdf, + struct rte_pci_addr *addr) +{ + int err; + struct devx_device mlx5_dev; + + if (mlx5_match_devx_bdf_to_addr(devx_bdf, addr)) + return 1; + /* + * Didn't match on Native/PF BDF, could still match a VF BDF, + * check it next. + */ + err = mlx5_glue->query_device(devx_bdf, &mlx5_dev); + if (err) { + DRV_LOG(ERR, "query_device failed"); + rte_errno = err; + return rte_errno; + } + if (mlx5_match_devx_bdf_to_addr(&mlx5_dev.raw_bdf, addr)) + return 1; + return 0; +} + +/** + * Look for DevX device that match to given rte_device. + * + * @param dev + * Pointer to the generic device. + * + * @return + * A device match on success, NULL otherwise and rte_errno is set. + */ +static struct devx_device_bdf * +mlx5_os_get_devx_device(struct rte_device *dev) +{ + int n; + struct devx_device_bdf *devx_list; + struct devx_device_bdf *orig_devx_list; + struct devx_device_bdf *devx_match = NULL; + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); + struct rte_pci_addr *addr = &pci_dev->addr; + + errno = 0; + devx_list = mlx5_glue->get_device_list(&n); + if (devx_list == NULL) { + rte_errno = errno ? errno : ENOSYS; + DRV_LOG(ERR, "Cannot list devices, is DevX enabled?"); + return NULL; + } + orig_devx_list = devx_list; + while (n-- > 0) { + int ret = mlx5_match_devx_devices_to_addr(devx_list, addr); + if (!ret) { + devx_list++; + continue; + } + if (ret != 1) { + rte_errno = ret; + goto exit; + } + devx_match = devx_list; + break; + } + if (devx_match == NULL) { + /* No device matches, just complain and bail out. */ + DRV_LOG(WARNING, + "No DevX device matches PCI device " PCI_PRI_FMT "," + " is DevX Configured?", + addr->domain, addr->bus, addr->devid, addr->function); + rte_errno = ENOENT; + } +exit: + mlx5_glue->free_device_list(orig_devx_list); + return devx_match; +} + +/** + * Function API open device under Windows. + * + * This function calls the Windows glue APIs to open a device. + * + * @param[out] dev_ctx + * Pointer to the context device data structure. + * @param dev + * Pointer to the generic device. + * @param dbnc + * Device argument help configure the environment variable. + * @param classes + * Chosen classes come from device arguments. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_os_devx_open_device(struct mlx5_dev_ctx *dev_ctx, struct rte_device *dev, + int dbnc, uint32_t classes) +{ + RTE_SET_USED(dbnc); + struct devx_device_bdf *devx_bdf_dev = NULL; + struct mlx5_context *mlx5_ctx; + + if (classes != MLX5_CLASS_ETH) { + DRV_LOG(WARNING, + "The chosen classes are not supported on Windows."); + rte_errno = ENOTSUP; + return -rte_errno; + } + devx_bdf_dev = mlx5_os_get_devx_device(dev); + if (devx_bdf_dev == NULL) + return -rte_errno; + /* Try to open DevX device with DV. */ + mlx5_ctx = mlx5_glue->open_device(devx_bdf_dev); + if (mlx5_ctx) { + DRV_LOG(ERR, "Failed to open DevX device."); + rte_errno = errno; + return -rte_errno; + } + if (mlx5_glue->query_device(devx_bdf_dev, &mlx5_ctx->mlx5_dev)) { + DRV_LOG(ERR, "Failed to query device context fields."); + claim_zero(mlx5_glue->close_device(mlx5_ctx)); + rte_errno = errno; + return -rte_errno; + } + dev_ctx->ctx = mlx5_ctx; + return 0; +} + +/** + * Allocate Protection Domain object and extract its pdn using DV API. + * + * @param[out] dev_ctx + * Pointer to the context device data structure. + * + * @return + * 0 on success, a negative value otherwise. + */ +int +mlx5_os_pd_create(struct mlx5_dev_ctx *dev_ctx) +{ + struct mlx5_pd *pd; + + pd = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pd), 0, SOCKET_ID_ANY); + if (!pd) + return -1; + struct mlx5_devx_obj *obj = mlx5_devx_cmd_alloc_pd(dev_ctx->ctx); + if (!obj) { + mlx5_free(pd); + return -1; + } + pd->obj = obj; + pd->pdn = obj->id; + pd->devx_ctx = dev_ctx->ctx; + dev_ctx->pd = pd; + dev_ctx->pdn = pd->pdn; + return 0; +} + /** * Register umem. * -- 2.25.1