Create a mlx5 bus driver framework for invoking drivers of multiple classes who have registered with the mlx5_pci bus driver.
Validate user class arguments for supported class combinations. Signed-off-by: Parav Pandit <pa...@mellanox.com> Acked-by: Matan Azrad <ma...@mellanox.com> --- Changelog: v3->v4: - Fixed dma_map error unwinding flow to follow same order for unmap v2->v3: - Addressed comments from Asaf - Using full names in function names - Added new line before function name in multiple functions - Added example string to parse for multiple classes - Dropped mlx5 prefix from static function - Addressed comments from Matan. - Renamed mlx5_valid_class_combo to mlx5_class_combinations - Added cross check for class drivers to support only 3 flags for now - Added full stop at the end of comment block. - Removed empty lines - Fixed issue to remove multiple classes for a driver - Using define for drv_flags at multiple places - Maintaining class driver list to keep load/unload order symmetric and mirror of each other. - Deriving drv_flags based on the class drivers - Using PCI address comparision helper instead of pointer comparision - Fixed alignment for id_table - Continue to probe_err if device is already probed - Perform dma map on best effort basis for all supported drivers - Removed drv_flags check - Dynamically build pci id table - Using PCI to mlx5 device helper routines v1->v2: - Address comments from Thomas and Gaetan - Enhanced driver to honor RTE_PCI_DRV_PROBE_AGAIN drv_flag - Use anonymous structure for class search and code changes around it - Define static for class comination array - Use RTE_DIM to find array size - Added OOM check for strdup() - Renamed copy variable to nstr_orig - Returning negagive error code - Returning directly if match entry found - Use compat condition check - Avoided cutting error message string - USe uint32_t datatype instead of enum mlx5_class - Changed logic to parse device arguments only once during probe() - Added check to fail driver probe if multiple classes register with DMA ops - Renamed function to parse_class_options --- drivers/bus/mlx5_pci/Makefile | 2 + drivers/bus/mlx5_pci/meson.build | 2 +- drivers/bus/mlx5_pci/mlx5_pci_bus.c | 508 ++++++++++++++++++++++++++++ 3 files changed, 511 insertions(+), 1 deletion(-) diff --git a/drivers/bus/mlx5_pci/Makefile b/drivers/bus/mlx5_pci/Makefile index de4ccd83f..1a005ee32 100644 --- a/drivers/bus/mlx5_pci/Makefile +++ b/drivers/bus/mlx5_pci/Makefile @@ -15,7 +15,9 @@ CFLAGS += -I$(RTE_SDK)/drivers/common/mlx5 CFLAGS += -I$(BUILDDIR)/drivers/common/mlx5 CFLAGS += -I$(RTE_SDK)/drivers/common/mlx5/linux CFLAGS += -I$(RTE_SDK)/drivers/bus/pci +CFLAGS += -D_DEFAULT_SOURCE LDLIBS += -lrte_eal +LDLIBS += -lrte_kvargs LDLIBS += -lrte_common_mlx5 LDLIBS += -lrte_pci -lrte_bus_pci diff --git a/drivers/bus/mlx5_pci/meson.build b/drivers/bus/mlx5_pci/meson.build index 64a17cbad..0532a9dfd 100644 --- a/drivers/bus/mlx5_pci/meson.build +++ b/drivers/bus/mlx5_pci/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2020 Mellanox Technologies Ltd -deps += ['pci', 'bus_pci', 'common_mlx5'] +deps += ['pci', 'bus_pci', 'common_mlx5', 'kvargs'] install_headers('rte_bus_mlx5_pci.h') sources = files('mlx5_pci_bus.c') diff --git a/drivers/bus/mlx5_pci/mlx5_pci_bus.c b/drivers/bus/mlx5_pci/mlx5_pci_bus.c index 66db3c7b0..6f219730c 100644 --- a/drivers/bus/mlx5_pci/mlx5_pci_bus.c +++ b/drivers/bus/mlx5_pci/mlx5_pci_bus.c @@ -2,13 +2,521 @@ * Copyright 2020 Mellanox Technologies, Ltd */ +#include <stdlib.h> +#include <rte_malloc.h> #include "rte_bus_mlx5_pci.h" +#include <mlx5_common_utils.h> +struct mlx5_pci_device { + struct rte_pci_device *pci_dev; + TAILQ_ENTRY(mlx5_pci_device) next; + uint32_t classes_loaded; +}; + +/* Head of list of class drivers. */ static TAILQ_HEAD(mlx5_pci_bus_drv_head, rte_mlx5_pci_driver) drv_list = TAILQ_HEAD_INITIALIZER(drv_list); +/* Head of mlx5 pci devices. */ +static TAILQ_HEAD(mlx5_pci_devices_head, mlx5_pci_device) devices_list = + TAILQ_HEAD_INITIALIZER(devices_list); + +static const struct { + const char *name; + unsigned int dev_class; +} mlx5_classes[] = { + { .name = "vdpa", .dev_class = MLX5_CLASS_VDPA }, + { .name = "net", .dev_class = MLX5_CLASS_NET }, +}; + +static const unsigned int mlx5_class_combinations[] = { + MLX5_CLASS_NET, + MLX5_CLASS_VDPA, + /* New class combination should be added here. + * For example a new multi class device combination + * can be MLX5_CLASS_FOO | MLX5_CLASS_BAR. + */ +}; + +static int +class_name_to_value(const char *class_name) +{ + unsigned int i; + + for (i = 0; i < RTE_DIM(mlx5_classes); i++) { + if (strcmp(class_name, mlx5_classes[i].name) == 0) + return mlx5_classes[i].dev_class; + } + return -EINVAL; +} + +static struct rte_mlx5_pci_driver * +class_driver_get(uint32_t class) +{ + struct rte_mlx5_pci_driver *driver; + + TAILQ_FOREACH(driver, &drv_list, next) { + if (driver->dev_class == class) + return driver; + } + return NULL; +} + +static int +bus_cmdline_options_handler(__rte_unused const char *key, + const char *class_names, void *opaque) +{ + int *ret = opaque; + char *nstr_org; + int class_val; + char *found; + char *nstr; + + *ret = 0; + nstr = strdup(class_names); + if (!nstr) { + *ret = -ENOMEM; + return *ret; + } + nstr_org = nstr; + while (nstr) { + /* Extract each individual class name. Multiple + * class key,value is supplied as class=net:vdpa:foo:bar. + */ + found = strsep(&nstr, ":"); + if (!found) + continue; + /* Check if its a valid class. */ + class_val = class_name_to_value(found); + if (class_val < 0) { + *ret = -EINVAL; + goto err; + } + *ret |= class_val; + } +err: + free(nstr_org); + if (*ret < 0) + DRV_LOG(ERR, "Invalid mlx5 class options %s." + " Maybe typo in device class argument setting?", + class_names); + return *ret; +} + +static int +parse_class_options(const struct rte_devargs *devargs) +{ + const char *key = MLX5_CLASS_ARG_NAME; + struct rte_kvargs *kvlist; + int ret = 0; + + if (devargs == NULL) + return 0; + kvlist = rte_kvargs_parse(devargs->args, NULL); + if (kvlist == NULL) + return 0; + if (rte_kvargs_count(kvlist, key)) + rte_kvargs_process(kvlist, key, bus_cmdline_options_handler, + &ret); + rte_kvargs_free(kvlist); + return ret; +} + void rte_mlx5_pci_driver_register(struct rte_mlx5_pci_driver *driver) { TAILQ_INSERT_TAIL(&drv_list, driver, next); } + +static bool +mlx5_bus_match(const struct rte_mlx5_pci_driver *drv, + const struct rte_pci_device *pci_dev) +{ + const struct rte_pci_id *id_table; + + for (id_table = drv->pci_driver.id_table; id_table->vendor_id != 0; + id_table++) { + /* Check if device's ids match the class driver's ids. */ + if (id_table->vendor_id != pci_dev->id.vendor_id && + id_table->vendor_id != PCI_ANY_ID) + continue; + if (id_table->device_id != pci_dev->id.device_id && + id_table->device_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_vendor_id != + pci_dev->id.subsystem_vendor_id && + id_table->subsystem_vendor_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_device_id != + pci_dev->id.subsystem_device_id && + id_table->subsystem_device_id != PCI_ANY_ID) + continue; + if (id_table->class_id != pci_dev->id.class_id && + id_table->class_id != RTE_CLASS_ANY_ID) + continue; + return true; + } + return false; +} + +static int +is_valid_class_combination(uint32_t user_classes) +{ + unsigned int i; + + /* Verify if user specified valid supported combination. */ + for (i = 0; i < RTE_DIM(mlx5_class_combinations); i++) { + if (mlx5_class_combinations[i] == user_classes) + return 0; + } + /* Not found any valid class combination. */ + return -EINVAL; +} + +static struct mlx5_pci_device * +pci_to_mlx5_device(const struct rte_pci_device *pci_dev) +{ + struct mlx5_pci_device *dev; + + TAILQ_FOREACH(dev, &devices_list, next) { + if (dev->pci_dev == pci_dev) + return dev; + } + return NULL; +} + +static bool +device_class_enabled(const struct mlx5_pci_device *device, uint32_t class) +{ + return (device->classes_loaded & class) ? true : false; +} + +static void +dev_release(struct mlx5_pci_device *dev) +{ + TAILQ_REMOVE(&devices_list, dev, next); + rte_free(dev); +} + +static int +class_drivers_remove(struct mlx5_pci_device *dev, uint32_t enabled_classes) +{ + struct rte_mlx5_pci_driver *driver; + int local_ret = -ENODEV; + unsigned int i = 0; + int ret = 0; + + enabled_classes &= dev->classes_loaded; + while (enabled_classes) { + driver = class_driver_get(RTE_BIT64(i)); + if (driver) { + local_ret = driver->pci_driver.remove(dev->pci_dev); + if (!local_ret) + dev->classes_loaded &= ~RTE_BIT64(i); + else if (ret == 0) + ret = local_ret; + } + enabled_classes &= ~RTE_BIT64(i); + i++; + } + if (local_ret) + ret = local_ret; + return ret; +} + +static int +class_drivers_probe(struct mlx5_pci_device *dev, + struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev, uint32_t user_classes) +{ + struct rte_mlx5_pci_driver *driver; + uint32_t enabled_classes = 0; + bool already_loaded; + int ret; + + TAILQ_FOREACH(driver, &drv_list, next) { + if ((driver->dev_class & user_classes) == 0) + continue; + if (!mlx5_bus_match(driver, pci_dev)) + continue; + already_loaded = dev->classes_loaded & driver->dev_class; + if (already_loaded && + !(driver->pci_driver.drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) { + DRV_LOG(ERR, "Device %s is already probed\n", + pci_dev->device.name); + ret = -EEXIST; + goto probe_err; + } + ret = driver->pci_driver.probe(pci_drv, pci_dev); + if (ret < 0) { + DRV_LOG(ERR, "Failed to load class driver = %s.\n", + driver->pci_driver.driver.name); + goto probe_err; + } + enabled_classes |= driver->dev_class; + } + dev->classes_loaded |= enabled_classes; + return 0; +probe_err: + /* Only unload drivers which are enabled which were enabled + * in this probe instance. + */ + class_drivers_remove(dev, enabled_classes); + return ret; +} + +/** + * DPDK callback to register to probe multiple PCI class devices. + * + * @param[in] pci_drv + * PCI driver structure. + * @param[in] dev + * PCI device information. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_bus_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *pci_dev) +{ + struct mlx5_pci_device *dev; + uint32_t user_classes = 0; + bool new_device = false; + int ret; + + ret = parse_class_options(pci_dev->device.devargs); + if (ret < 0) + return ret; + user_classes = ret; + if (user_classes) { + /* Validate combination here. */ + ret = is_valid_class_combination(user_classes); + if (ret) { + DRV_LOG(ERR, "Unsupported mlx5 classes supplied."); + return ret; + } + } else { + /* Default to net class. */ + user_classes = MLX5_CLASS_NET; + } + dev = pci_to_mlx5_device(pci_dev); + if (!dev) { + dev = rte_zmalloc("mlx5_pci_device", sizeof(*dev), 0); + if (!dev) + return -ENOMEM; + dev->pci_dev = pci_dev; + TAILQ_INSERT_HEAD(&devices_list, dev, next); + new_device = true; + } + ret = class_drivers_probe(dev, pci_drv, pci_dev, user_classes); + if (ret) + goto class_err; + return 0; +class_err: + if (new_device) + dev_release(dev); + return ret; +} + +/** + * DPDK callback to remove one or more class devices for a PCI device. + * + * This function removes all class devices belong to a given PCI device. + * + * @param[in] pci_dev + * Pointer to the PCI device. + * + * @return + * 0 on success, the function cannot fail. + */ +static int +mlx5_bus_pci_remove(struct rte_pci_device *pci_dev) +{ + struct mlx5_pci_device *dev; + int ret; + + dev = pci_to_mlx5_device(pci_dev); + if (!dev) + return -ENODEV; + /* Matching device found, cleanup and unload class drivers. */ + ret = class_drivers_remove(dev, dev->classes_loaded); + if (!ret) + dev_release(dev); + return ret; +} + +static int +mlx5_bus_pci_dma_map(struct rte_pci_device *pci_dev, void *addr, + uint64_t iova, size_t len) +{ + struct rte_mlx5_pci_driver *class = NULL; + struct rte_mlx5_pci_driver *temp; + struct mlx5_pci_device *dev; + int ret = -EINVAL; + + dev = pci_to_mlx5_device(pci_dev); + if (!dev) + return -ENODEV; + TAILQ_FOREACH(class, &drv_list, next) { + if (device_class_enabled(dev, class->dev_class) && + class->pci_driver.dma_map) { + ret = class->pci_driver.dma_map(pci_dev, addr, + iova, len); + if (ret) + goto map_err; + } + } + return ret; +map_err: + TAILQ_FOREACH(temp, &drv_list, next) { + if (temp == class) + break; + if (device_class_enabled(dev, temp->dev_class) && + temp->pci_driver.dma_map && temp->pci_driver.dma_unmap) + temp->pci_driver.dma_unmap(pci_dev, addr, iova, len); + } + return ret; +} + +static int +mlx5_bus_pci_dma_unmap(struct rte_pci_device *pci_dev, void *addr, + uint64_t iova, size_t len) +{ + struct rte_mlx5_pci_driver *class; + struct mlx5_pci_device *dev; + int local_ret = -EINVAL; + int ret; + + dev = pci_to_mlx5_device(pci_dev); + if (!dev) + return -ENODEV; + ret = 0; + /* There is no unmap error recovery in current implementation. */ + TAILQ_FOREACH_REVERSE(class, &drv_list, mlx5_pci_bus_drv_head, next) { + if (device_class_enabled(dev, class->dev_class) && + class->pci_driver.dma_unmap) { + local_ret = class->pci_driver.dma_unmap(pci_dev, addr, + iova, len); + if (local_ret && (ret == 0)) + ret = local_ret; + } + } + if (local_ret) + ret = local_ret; + return ret; +} + +/* PCI ID table is build dynamically based on registered mlx5 + * class drivers. + */ +static struct rte_pci_id *mlx5_bus_pci_id_map; + +static int +pci_id_table_size_get(const struct rte_pci_id *id_table) +{ + int table_size = 0; + + for (; id_table->vendor_id != 0; id_table++) + table_size++; + return table_size; +} + +static bool +pci_id_exists(const struct rte_pci_id *id, int next_idx) +{ + int current_size = next_idx - 1; + int i; + + for (i = 0; i < current_size; i++) { + if (id->device_id == mlx5_bus_pci_id_map[i].device_id && + id->vendor_id == mlx5_bus_pci_id_map[i].vendor_id && + id->subsystem_vendor_id == + mlx5_bus_pci_id_map[i].subsystem_vendor_id && + id->subsystem_device_id == + mlx5_bus_pci_id_map[i].subsystem_device_id) + return true; + } + return false; +} + +static void +pci_id_insert(const struct rte_pci_id *id_table, int *next_idx) +{ + for (; id_table->vendor_id != 0; id_table++) { + if (!pci_id_exists(id_table, *next_idx)) { + /* New entry; add to the table. */ + mlx5_bus_pci_id_map[*next_idx] = *id_table; + (*next_idx)++; + } + } +} + +static int +pci_ids_table_build(void) +{ + struct rte_mlx5_pci_driver *first_driver; + struct rte_mlx5_pci_driver *driver; + const struct rte_pci_id *id_table; + int num_ids = 0; + int i = 0; + + TAILQ_FOREACH(driver, &drv_list, next) + num_ids += pci_id_table_size_get(driver->pci_driver.id_table); + if (!num_ids) + return -ENODEV; + /* Increase size by one for the termination entry of vendor_id = 0. */ + num_ids += 1; + mlx5_bus_pci_id_map = calloc(num_ids, sizeof(*mlx5_bus_pci_id_map)); + if (!mlx5_bus_pci_id_map) + return -ENOMEM; + first_driver = TAILQ_FIRST(&drv_list); + /* Copy the first class driver's ID table. */ + for (id_table = first_driver->pci_driver.id_table; + id_table->vendor_id != 0; id_table++, i++) + mlx5_bus_pci_id_map[i] = *id_table; + TAILQ_FOREACH(driver, &drv_list, next) { + /* We already added first driver; skip it. */ + if (driver == first_driver) + continue; + pci_id_insert(driver->pci_driver.id_table, &i); + } + mlx5_bus_pci_id_map[i].vendor_id = 0; + return 0; +} + +static bool mlx5_bus_registered; +static struct rte_pci_driver mlx5_bus_driver = { + .driver = { + .name = "mlx5_bus_pci", + }, + .probe = mlx5_bus_pci_probe, + .remove = mlx5_bus_pci_remove, + .dma_map = mlx5_bus_pci_dma_map, + .dma_unmap = mlx5_bus_pci_dma_unmap, +}; + +RTE_INIT(mlx5_bus_pci) +{ + struct rte_mlx5_pci_driver *class; + int ret; + + ret = pci_ids_table_build(); + if (ret) + return; + TAILQ_FOREACH(class, &drv_list, next) + mlx5_bus_driver.drv_flags |= class->pci_driver.drv_flags; + mlx5_bus_driver.id_table = mlx5_bus_pci_id_map; + rte_pci_register(&mlx5_bus_driver); + mlx5_bus_registered = true; +} + +RTE_FINI(mlx5_bus_pci_finish) +{ + if (mlx5_bus_registered) + rte_pci_unregister(&mlx5_bus_driver); + if (mlx5_bus_pci_id_map) + free(mlx5_bus_pci_id_map); +} +RTE_PMD_EXPORT_NAME(mlx5_bus_pci, __COUNTER__); +RTE_PMD_REGISTER_PCI_TABLE(mlx5_bus, mlx5_bus_pci_id_map); -- 2.26.2