CDX bus supports multiple type of devices, which can be exposed to user-space via vfio-cdx.
vfio-cdx provides the MMIO IO_MEMORY regions as well as the DMA interface for the device (IOMMU). This support aims to enable the DPDK to support the cdx devices in user-space using VFIO interface. Signed-off-by: Nipun Gupta <nipun.gu...@amd.com> --- MAINTAINERS | 5 + drivers/bus/cdx/bus_cdx_driver.h | 201 ++++++++++++ drivers/bus/cdx/cdx.c | 521 +++++++++++++++++++++++++++++++ drivers/bus/cdx/cdx_logs.h | 37 +++ drivers/bus/cdx/cdx_vfio.c | 437 ++++++++++++++++++++++++++ drivers/bus/cdx/meson.build | 13 + drivers/bus/cdx/private.h | 49 +++ drivers/bus/cdx/version.map | 11 + 8 files changed, 1274 insertions(+) create mode 100644 drivers/bus/cdx/bus_cdx_driver.h create mode 100644 drivers/bus/cdx/cdx.c create mode 100644 drivers/bus/cdx/cdx_logs.h create mode 100644 drivers/bus/cdx/cdx_vfio.c create mode 100644 drivers/bus/cdx/meson.build create mode 100644 drivers/bus/cdx/private.h create mode 100644 drivers/bus/cdx/version.map diff --git a/MAINTAINERS b/MAINTAINERS index 8df23e5099..1f9b6af9b9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -569,6 +569,11 @@ M: Parav Pandit <pa...@nvidia.com> M: Xueming Li <xuemi...@nvidia.com> F: drivers/bus/auxiliary/ +CDX bus driver +M: Nipun Gupta <nipun.gu...@amd.com> +M: Nikhil Agarwal <nikhil.agar...@amd.com> +F: drivers/bus/cdx/ + Intel FPGA bus M: Rosen Xu <rosen...@intel.com> F: drivers/bus/ifpga/ diff --git a/drivers/bus/cdx/bus_cdx_driver.h b/drivers/bus/cdx/bus_cdx_driver.h new file mode 100644 index 0000000000..7edcb019eb --- /dev/null +++ b/drivers/bus/cdx/bus_cdx_driver.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + */ + +#ifndef _BUS_CDX_DRIVER_H_ +#define _BUS_CDX_DRIVER_H_ + +/** + * @file + * + * CDX bus interface + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <limits.h> +#include <errno.h> +#include <stdint.h> +#include <inttypes.h> + +#include <bus_driver.h> +#include <dev_driver.h> +#include <rte_debug.h> +#include <rte_interrupts.h> +#include <rte_dev.h> +#include <rte_bus.h> + +/* Forward declarations */ +struct rte_cdx_device; +struct rte_cdx_driver; + +#define CDX_MAX_RESOURCE 4 + +/** List of CDX devices */ +RTE_TAILQ_HEAD(rte_cdx_device_list, rte_cdx_device); +/** List of CDX drivers */ +RTE_TAILQ_HEAD(rte_cdx_driver_list, rte_cdx_driver); + +/* CDX Bus iterators */ +#define FOREACH_DEVICE_ON_CDXBUS(p) \ + RTE_TAILQ_FOREACH(p, &rte_cdx_bus.device_list, next) + +#define FOREACH_DRIVER_ON_CDXBUS(p) \ + RTE_TAILQ_FOREACH(p, &rte_cdx_bus.driver_list, next) + +/** Any CDX device identifier (vendor, device) */ +#define RTE_CDX_ANY_ID (0xffff) + +#define RTE_PMD_REGISTER_CDX_TABLE(name, table) \ +static const char DRV_EXP_TAG(name, cdx_tbl_export)[] __rte_used = \ +RTE_STR(table) + +/** + * A structure describing an ID for a CDX driver. Each driver provides a + * table of these IDs for each device that it supports. + */ +struct rte_cdx_id { + uint16_t vendor_id; /**< Vendor ID. */ + uint16_t device_id; /**< Device ID. */ +}; + +/** + * A structure describing a CDX device. + */ +struct rte_cdx_device { + RTE_TAILQ_ENTRY(rte_cdx_device) next; /**< Next probed CDX device. */ + struct rte_device device; /**< Inherit core device */ + struct rte_cdx_id id; /**< CDX ID. */ + struct rte_mem_resource mem_resource[CDX_MAX_RESOURCE]; + /**< CDX Memory Resource */ +}; + +/** + * @internal + * Helper macro for drivers that need to convert to struct rte_cdx_device. + */ +#define RTE_DEV_TO_CDX_DEV(ptr) \ + container_of(ptr, struct rte_cdx_device, device) + +#define RTE_DEV_TO_CDX_DEV_CONST(ptr) \ + container_of(ptr, const struct rte_cdx_device, device) + +#define RTE_ETH_DEV_TO_CDX_DEV(eth_dev) RTE_DEV_TO_CDX_DEV((eth_dev)->device) + +#ifdef __cplusplus +/** C++ macro used to help building up tables of device IDs */ +#define RTE_CDX_DEVICE(vend, dev) \ + (vend), \ + (dev) +#else +/** Macro used to help building up tables of device IDs */ +#define RTE_CDX_DEVICE(vend, dev) \ + .vendor_id = (vend), \ + .device_id = (dev) +#endif + +/** + * Initialisation function for the driver called during CDX probing. + */ +typedef int (rte_cdx_probe_t)(struct rte_cdx_driver *, struct rte_cdx_device *); + +/** + * Uninitialisation function for the driver called during hotplugging. + */ +typedef int (rte_cdx_remove_t)(struct rte_cdx_device *); + +/** + * A structure describing a CDX driver. + */ +struct rte_cdx_driver { + RTE_TAILQ_ENTRY(rte_cdx_driver) next; /**< Next in list. */ + struct rte_driver driver; /**< Inherit core driver. */ + struct rte_cdx_bus *bus; /**< CDX bus reference. */ + rte_cdx_probe_t *probe; /**< Device probe function. */ + rte_cdx_remove_t *remove; /**< Device remove function. */ + const struct rte_cdx_id *id_table; /**< ID table, NULL terminated. */ + uint32_t drv_flags; /**< Flags RTE_CDX_DRV_*. */ +}; + +/** + * Structure describing the CDX bus + */ +struct rte_cdx_bus { + struct rte_bus bus; /**< Inherit the generic class */ + struct rte_cdx_device_list device_list; /**< List of CDX devices */ + struct rte_cdx_driver_list driver_list; /**< List of CDX drivers */ +}; + +/** + * Get Pathname of CDX devices directory. + * + * @return + * sysfs path for CDX devices. + */ +__rte_internal +const char *rte_cdx_get_sysfs_path(void); + +/** + * Map the CDX device resources in user space virtual memory address + * + * @param dev + * A pointer to a rte_cdx_device structure describing the device + * to use + * + * @return + * 0 on success, negative on error and positive if no driver + * is found for the device. + */ +__rte_internal +int rte_cdx_map_device(struct rte_cdx_device *dev); + +/** + * Unmap this device + * + * @param dev + * A pointer to a rte_cdx_device structure describing the device + * to use + */ +__rte_internal +void rte_cdx_unmap_device(struct rte_cdx_device *dev); + +/** + * Register a CDX driver. + * + * @param driver + * A pointer to a rte_cdx_driver structure describing the driver + * to be registered. + */ +__rte_internal +void rte_cdx_register(struct rte_cdx_driver *driver); + +/** + * Helper for CDX device registration from driver (eth, crypto, raw) instance + */ +#define RTE_PMD_REGISTER_CDX(nm, cdx_drv) \ + RTE_INIT(cdxinitfn_ ##nm) \ + {\ + (cdx_drv).driver.name = RTE_STR(nm);\ + rte_cdx_register(&cdx_drv); \ + } \ + RTE_PMD_EXPORT_NAME(nm, __COUNTER__) + +/** + * Unregister a CDX driver. + * + * @param driver + * A pointer to a rte_cdx_driver structure describing the driver + * to be unregistered. + */ +__rte_internal +void rte_cdx_unregister(struct rte_cdx_driver *driver); + +#ifdef __cplusplus +} +#endif + +#endif /* _BUS_CDX_DRIVER_H_ */ diff --git a/drivers/bus/cdx/cdx.c b/drivers/bus/cdx/cdx.c new file mode 100644 index 0000000000..bb23b32312 --- /dev/null +++ b/drivers/bus/cdx/cdx.c @@ -0,0 +1,521 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + */ + +/* + * Architecture Overview + * ===================== + * CDX is a Hardware Architecture designed for AMD FPGA devices. It + * consists of sophisticated mechanism for interaction between FPGA, + * Firmware and the APUs (Application CPUs). + * + * Firmware resides on RPU (Realtime CPUs) which interacts with + * the FPGA program manager and the APUs. The RPU provides memory-mapped + * interface (RPU if) which is used to communicate with APUs. + * + * The diagram below shows an overview of the CDX architecture: + * + * +--------------------------------------+ + * | DPDK | + * | DPDK CDX drivers | + * | | | + * | DPDK CDX bus | + * | | | + * +-----------------------------|--------+ + * | + * +-----------------------------|--------+ + * | Application CPUs (APU) | | + * | | | + * | VFIO CDX driver | + * | Linux OS | | + * | Linux CDX bus | + * | | | + * +-----------------------------|--------+ + * | + * | + * +------------------------| RPU if |----+ + * | | | + * | V | + * | Realtime CPUs (RPU) | + * | | + * +--------------------------------------+ + * | + * +---------------------|----------------+ + * | FPGA | | + * | +-----------------------+ | + * | | | | | + * | +-------+ +-------+ +-------+ | + * | | dev 1 | | dev 2 | | dev 3 | | + * | +-------+ +-------+ +-------+ | + * +--------------------------------------+ + * + * The RPU firmware extracts the device information from the loaded FPGA + * image and implements a mechanism that allows the APU drivers to + * enumerate such devices (device personality and resource details) via + * a dedicated communication channel. + * + * VFIO CDX driver provides the CDX device resources like MMIO and interrupts + * to map to user-space. DPDK CDX bus uses sysfs interface and the vfio-cdx + * driver to discover and initialize the CDX devices for user-space + * applications. + */ + +#include <string.h> +#include <dirent.h> + +#include <rte_log.h> +#include <rte_bus.h> +#include <rte_eal_paging.h> +#include <rte_errno.h> +#include <rte_devargs.h> +#include <rte_malloc.h> +#include <rte_memcpy.h> +#include <rte_vfio.h> + +#include <eal_filesystem.h> + +#include "bus_cdx_driver.h" +#include "cdx_logs.h" +#include "private.h" + +#define SYSFS_CDX_DEVICES "/sys/bus/cdx/devices" +#define CDX_BUS_NAME cdx +#define CDX_DEV_PREFIX "cdx-" + +/** + * @file + * CDX probing using Linux sysfs. + */ + +/* Add a device to CDX bus */ +static void +cdx_add_device(struct rte_cdx_device *cdx_dev) +{ + TAILQ_INSERT_TAIL(&rte_cdx_bus.device_list, cdx_dev, next); +} + +static int +cdx_get_kernel_driver_by_path(const char *filename, char *driver_name, + size_t len) +{ + int count; + char path[PATH_MAX]; + char *name; + + if (!filename || !driver_name) + return -1; + + count = readlink(filename, path, PATH_MAX); + if (count >= PATH_MAX) + return -1; + + /* For device does not have a driver */ + if (count < 0) + return 1; + + path[count] = '\0'; + + name = strrchr(path, '/'); + if (name) { + strlcpy(driver_name, name + 1, len); + return 0; + } + + return -1; +} + +int rte_cdx_map_device(struct rte_cdx_device *dev) +{ + return cdx_vfio_map_resource(dev); +} + +void rte_cdx_unmap_device(struct rte_cdx_device *dev) +{ + cdx_vfio_unmap_resource(dev); +} + +static struct rte_devargs * +cdx_devargs_lookup(const char *dev_name) +{ + struct rte_devargs *devargs; + + RTE_EAL_DEVARGS_FOREACH("cdx", devargs) { + if (strcmp(devargs->name, dev_name) == 0) + return devargs; + } + return NULL; +} + +static bool +cdx_ignore_device(const char *dev_name) +{ + struct rte_devargs *devargs = cdx_devargs_lookup(dev_name); + + switch (rte_cdx_bus.bus.conf.scan_mode) { + case RTE_BUS_SCAN_ALLOWLIST: + if (devargs && devargs->policy == RTE_DEV_ALLOWED) + return false; + break; + case RTE_BUS_SCAN_UNDEFINED: + case RTE_BUS_SCAN_BLOCKLIST: + if (devargs == NULL || devargs->policy != RTE_DEV_BLOCKED) + return false; + break; + } + return true; +} + +/* + * Scan one cdx sysfs entry, and fill the devices list from it. + * It checks if the CDX device is bound to vfio-cdx driver. In case + * the device is vfio bound, it reads the vendor and device id and + * stores it for device-driver matching. + */ +static int +cdx_scan_one(const char *dirname, const char *dev_name) +{ + char filename[PATH_MAX]; + struct rte_cdx_device *dev = NULL; + char driver[PATH_MAX]; + unsigned long tmp; + char *name = NULL; + int ret; + + dev = calloc(1, sizeof(*dev)); + if (!dev) + return -ENOMEM; + + name = calloc(1, RTE_DEV_NAME_MAX_LEN); + if (!name) { + ret = -ENOMEM; + goto err; + } + + dev->device.bus = &rte_cdx_bus.bus; + memcpy(name, dev_name, RTE_DEV_NAME_MAX_LEN); + dev->device.name = name; + + /* parse driver */ + snprintf(filename, sizeof(filename), "%s/driver", dirname); + ret = cdx_get_kernel_driver_by_path(filename, driver, sizeof(driver)); + if (ret < 0) { + CDX_BUS_ERR("Fail to get kernel driver"); + ret = -1; + goto err; + } + + /* + * Check if device is bound to 'vfio-cdx' driver, so that user-space + * can gracefully access the device. + */ + if (ret || strcmp(driver, "vfio-cdx")) { + ret = 0; + goto err; + } + + /* get vendor id */ + snprintf(filename, sizeof(filename), "%s/vendor", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + ret = -1; + goto err; + } + dev->id.vendor_id = (uint16_t)tmp; + + /* get device id */ + snprintf(filename, sizeof(filename), "%s/device", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.device_id = (uint16_t)tmp; + + cdx_add_device(dev); + + return 0; + +err: + if (name) + free(name); + if (dev) + free(dev); + return ret; +} + +/* + * Scan the content of the CDX bus, and the devices in the devices + * list. + */ +static int +cdx_scan(void) +{ + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + + dir = opendir(rte_cdx_get_sysfs_path()); + if (dir == NULL) { + CDX_BUS_ERR("%s(): opendir failed: %s", __func__, + strerror(errno)); + return -1; + } + + while ((e = readdir(dir)) != NULL) { + if (e->d_name[0] == '.') + continue; + + if (cdx_ignore_device(e->d_name)) + continue; + + snprintf(dirname, sizeof(dirname), "%s/%s", + rte_cdx_get_sysfs_path(), e->d_name); + + if (cdx_scan_one(dirname, e->d_name) < 0) + goto error; + } + closedir(dir); + return 0; + +error: + closedir(dir); + return -1; +} + +const char * +rte_cdx_get_sysfs_path(void) +{ + return SYSFS_CDX_DEVICES; +} + +/* map a particular resource from a file */ +void * +cdx_map_resource(void *requested_addr, int fd, off_t offset, size_t size, + int additional_flags) +{ + void *mapaddr; + + /* Map the cdx MMIO memory resource of device */ + mapaddr = rte_mem_map(requested_addr, size, + RTE_PROT_READ | RTE_PROT_WRITE, + RTE_MAP_SHARED | additional_flags, fd, offset); + if (mapaddr == NULL) { + CDX_BUS_ERR("%s(): cannot map resource(%d, %p, 0x%zx, 0x%llx): %s (%p)", + __func__, fd, requested_addr, size, + (unsigned long long)offset, + rte_strerror(rte_errno), mapaddr); + } + CDX_BUS_DEBUG("CDX MMIO memory mapped at %p", mapaddr); + + return mapaddr; +} + +/* unmap a particular resource */ +void +cdx_unmap_resource(void *requested_addr, size_t size) +{ + if (requested_addr == NULL) + return; + + /* Unmap the CDX memory resource of device */ + if (rte_mem_unmap(requested_addr, size)) { + CDX_BUS_ERR("%s(): cannot mem unmap(%p, %#zx): %s", __func__, + requested_addr, size, rte_strerror(rte_errno)); + } + CDX_BUS_DEBUG("CDX memory unmapped at %p", requested_addr); +} +/* + * Match the CDX Driver and Device using device id and vendor id. + */ +static int +cdx_match(const struct rte_cdx_driver *cdx_drv, + const struct rte_cdx_device *cdx_dev) +{ + const struct rte_cdx_id *id_table; + + for (id_table = cdx_drv->id_table; id_table->vendor_id != 0; + id_table++) { + /* check if device's identifiers match the driver's ones */ + if (id_table->vendor_id != cdx_dev->id.vendor_id && + id_table->vendor_id != RTE_CDX_ANY_ID) + continue; + if (id_table->device_id != cdx_dev->id.device_id && + id_table->device_id != RTE_CDX_ANY_ID) + continue; + + return 1; + } + + return 0; +} + +/* + * If vendor id and device id match, call the probe() function of the + * driver. + */ +static int +cdx_probe_one_driver(struct rte_cdx_driver *dr, + struct rte_cdx_device *dev) +{ + const char *dev_name = dev->device.name; + bool already_probed; + int ret; + + if ((dr == NULL) || (dev == NULL)) + return -EINVAL; + + /* The device is not blocked; Check if driver supports it */ + if (!cdx_match(dr, dev)) + /* Match of device and driver failed */ + return 1; + + already_probed = rte_dev_is_probed(&dev->device); + if (already_probed) { + CDX_BUS_INFO("Device %s is already probed", dev->device.name); + return -EEXIST; + } + + CDX_BUS_DEBUG(" probe device %s using driver: %s", dev_name, + dr->driver.name); + + ret = cdx_vfio_map_resource(dev); + if (ret != 0) { + CDX_BUS_ERR("CDX map device failed: %d", ret); + goto error_map_device; + } + + /* call the driver probe() function */ + ret = dr->probe(dr, dev); + if (ret) { + CDX_BUS_ERR("Probe CDX driver: %s device: %s failed: %d", + dr->driver.name, dev_name, ret); + goto error_probe; + } else { + dev->device.driver = &dr->driver; + } + + return ret; + +error_probe: + cdx_vfio_unmap_resource(dev); +error_map_device: + return ret; +} + +/* + * If vendor/device ID match, call the probe() function of all + * registered driver for the given device. Return < 0 if initialization + * failed, return 1 if no driver is found for this device. + */ +static int +cdx_probe_all_drivers(struct rte_cdx_device *dev) +{ + struct rte_cdx_driver *dr = NULL; + int rc = 0; + + if (dev == NULL) + return -EINVAL; + + FOREACH_DRIVER_ON_CDXBUS(dr) { + rc = cdx_probe_one_driver(dr, dev); + if (rc < 0) + /* negative value is an error */ + return rc; + if (rc > 0) + /* positive value means driver doesn't support it */ + continue; + return 0; + } + return 1; +} + +/* + * Scan the content of the CDX bus, and call the probe() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + */ +static int +cdx_probe(void) +{ + struct rte_cdx_device *dev = NULL; + size_t probed = 0, failed = 0; + int ret = 0; + + FOREACH_DEVICE_ON_CDXBUS(dev) { + probed++; + + ret = cdx_probe_all_drivers(dev); + if (ret < 0) { + CDX_BUS_ERR("Requested device %s cannot be used", + dev->device.name); + rte_errno = errno; + failed++; + ret = 0; + } + } + + return (probed && probed == failed) ? -1 : 0; +} + +static int +cdx_parse(const char *name, void *addr) +{ + const char **out = addr; + int ret; + + ret = strncmp(name, CDX_DEV_PREFIX, strlen(CDX_DEV_PREFIX)); + + if (ret == 0 && addr) + *out = name; + + return ret; +} + +/* register a driver */ +void +rte_cdx_register(struct rte_cdx_driver *driver) +{ + TAILQ_INSERT_TAIL(&rte_cdx_bus.driver_list, driver, next); + driver->bus = &rte_cdx_bus; +} + +/* unregister a driver */ +void +rte_cdx_unregister(struct rte_cdx_driver *driver) +{ + TAILQ_REMOVE(&rte_cdx_bus.driver_list, driver, next); + driver->bus = NULL; +} + +static struct rte_device * +cdx_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, + const void *data) +{ + const struct rte_cdx_device *cdx_start; + struct rte_cdx_device *cdx_dev; + + if (start != NULL) { + cdx_start = RTE_DEV_TO_CDX_DEV_CONST(start); + cdx_dev = TAILQ_NEXT(cdx_start, next); + } else { + cdx_dev = TAILQ_FIRST(&rte_cdx_bus.device_list); + } + while (cdx_dev != NULL) { + if (cmp(&cdx_dev->device, data) == 0) + return &cdx_dev->device; + cdx_dev = TAILQ_NEXT(cdx_dev, next); + } + return NULL; +} + +struct rte_cdx_bus rte_cdx_bus = { + .bus = { + .scan = cdx_scan, + .probe = cdx_probe, + .find_device = cdx_find_device, + .parse = cdx_parse, + }, + .device_list = TAILQ_HEAD_INITIALIZER(rte_cdx_bus.device_list), + .driver_list = TAILQ_HEAD_INITIALIZER(rte_cdx_bus.driver_list), +}; + +RTE_REGISTER_BUS(cdx, rte_cdx_bus.bus); +RTE_LOG_REGISTER_DEFAULT(cdx_logtype_bus, NOTICE); diff --git a/drivers/bus/cdx/cdx_logs.h b/drivers/bus/cdx/cdx_logs.h new file mode 100644 index 0000000000..e5e23d8b94 --- /dev/null +++ b/drivers/bus/cdx/cdx_logs.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + */ + +#ifndef _CDX_LOGS_H_ +#define _CDX_LOGS_H_ + +extern int cdx_logtype_bus; + +#define CDX_BUS_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, cdx_logtype_bus, "cdx: " fmt "\n", \ + ##args) + +/* Debug logs are with Function names */ +#define CDX_BUS_DEBUG(fmt, args...) \ + rte_log(RTE_LOG_DEBUG, cdx_logtype_bus, "cdx: %s(): " fmt "\n", \ + __func__, ##args) + +#define CDX_BUS_INFO(fmt, args...) \ + CDX_BUS_LOG(INFO, fmt, ## args) +#define CDX_BUS_ERR(fmt, args...) \ + CDX_BUS_LOG(ERR, fmt, ## args) +#define CDX_BUS_WARN(fmt, args...) \ + CDX_BUS_LOG(WARNING, fmt, ## args) + +/* DP Logs, toggled out at compile time if level lower than current level */ +#define CDX_BUS_DP_LOG(level, fmt, args...) \ + RTE_LOG_DP(level, PMD, fmt, ## args) + +#define CDX_BUS_DP_DEBUG(fmt, args...) \ + CDX_BUS_DP_LOG(DEBUG, fmt, ## args) +#define CDX_BUS_DP_INFO(fmt, args...) \ + CDX_BUS_DP_LOG(INFO, fmt, ## args) +#define CDX_BUS_DP_WARN(fmt, args...) \ + CDX_BUS_DP_LOG(WARNING, fmt, ## args) + +#endif /* _CDX_LOGS_H_ */ diff --git a/drivers/bus/cdx/cdx_vfio.c b/drivers/bus/cdx/cdx_vfio.c new file mode 100644 index 0000000000..ae11f589b3 --- /dev/null +++ b/drivers/bus/cdx/cdx_vfio.c @@ -0,0 +1,437 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + */ + +/** + * @file + * CDX probing using Linux VFIO. + * + * This code tries to determine if the CDX device is bound to VFIO driver, + * and initialize it (map MMIO regions, set up interrupts) if that's the case. + * + */ + +#include <string.h> +#include <fcntl.h> +#include <sys/eventfd.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <stdbool.h> + +#include <rte_log.h> +#include <rte_eal_paging.h> +#include <rte_malloc.h> +#include <rte_vfio.h> +#include <rte_eal.h> +#include <rte_bus.h> +#include <rte_spinlock.h> +#include <rte_tailq.h> + +#include "eal_filesystem.h" + +#include "bus_cdx_driver.h" +#include "cdx_logs.h" +#include "private.h" + +/** + * A structure describing a CDX mapping. + */ +struct cdx_map { + void *addr; + char *path; + uint64_t offset; + uint64_t size; +}; + +/** + * A structure describing a mapped CDX resource. + * For multi-process we need to reproduce all CDX mappings in secondary + * processes, so save them in a tailq. + */ +struct mapped_cdx_resource { + TAILQ_ENTRY(mapped_cdx_resource) next; + char name[RTE_DEV_NAME_MAX_LEN]; /**< CDX device name */ + char path[PATH_MAX]; + int nb_maps; + struct cdx_map maps[CDX_MAX_RESOURCE]; +}; + +/** mapped cdx device list */ +TAILQ_HEAD(mapped_cdx_res_list, mapped_cdx_resource); + +static struct rte_tailq_elem cdx_vfio_tailq = { + .name = "VFIO_CDX_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(cdx_vfio_tailq) + +static struct mapped_cdx_resource * +cdx_vfio_find_and_unmap_resource(struct mapped_cdx_res_list *vfio_res_list, + struct rte_cdx_device *dev) +{ + struct mapped_cdx_resource *vfio_res = NULL; + const char *dev_name = dev->device.name; + struct cdx_map *maps; + int i; + + /* Get vfio_res */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (strcmp(vfio_res->name, dev_name)) + continue; + break; + } + + if (vfio_res == NULL) + return vfio_res; + + CDX_BUS_INFO("Releasing CDX mapped resource for %s", dev_name); + + maps = vfio_res->maps; + for (i = 0; i < vfio_res->nb_maps; i++) { + if (maps[i].addr) { + CDX_BUS_DEBUG("Calling cdx_unmap_resource for %s at %p", + dev_name, maps[i].addr); + cdx_unmap_resource(maps[i].addr, maps[i].size); + } + } + + return vfio_res; +} + +static int +cdx_vfio_unmap_resource_primary(struct rte_cdx_device *dev) +{ + char cdx_addr[PATH_MAX] = {0}; + struct mapped_cdx_resource *vfio_res = NULL; + struct mapped_cdx_res_list *vfio_res_list; + + vfio_res_list = + RTE_TAILQ_CAST(cdx_vfio_tailq.head, mapped_cdx_res_list); + vfio_res = cdx_vfio_find_and_unmap_resource(vfio_res_list, dev); + + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + CDX_BUS_ERR("%s cannot find TAILQ entry for cdx device!", + cdx_addr); + return -1; + } + + TAILQ_REMOVE(vfio_res_list, vfio_res, next); + rte_free(vfio_res); + return 0; +} + +static int +cdx_vfio_unmap_resource_secondary(struct rte_cdx_device *dev) +{ + struct mapped_cdx_resource *vfio_res = NULL; + struct mapped_cdx_res_list *vfio_res_list; + + vfio_res_list = + RTE_TAILQ_CAST(cdx_vfio_tailq.head, mapped_cdx_res_list); + vfio_res = cdx_vfio_find_and_unmap_resource(vfio_res_list, dev); + + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + CDX_BUS_ERR("%s cannot find TAILQ entry for CDX device!", + dev->device.name); + return -1; + } + + return 0; +} + +int +cdx_vfio_unmap_resource(struct rte_cdx_device *dev) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + return cdx_vfio_unmap_resource_primary(dev); + else + return cdx_vfio_unmap_resource_secondary(dev); +} + +static int +cdx_rte_vfio_setup_device(int vfio_dev_fd) +{ + /* + * Reset the device. If the device is not capable of resetting, + * then it updates errno as EINVAL. + */ + if (ioctl(vfio_dev_fd, VFIO_DEVICE_RESET) && errno != EINVAL) { + CDX_BUS_ERR("Unable to reset device! Error: %d (%s)", errno, + strerror(errno)); + return -1; + } + + return 0; +} + +static int +cdx_vfio_mmap_resource(int vfio_dev_fd, struct mapped_cdx_resource *vfio_res, + int index, int additional_flags) +{ + struct cdx_map *map = &vfio_res->maps[index]; + void *vaddr; + + if (map->size == 0) { + CDX_BUS_DEBUG("map size is 0, skip region %d", index); + return 0; + } + + /* reserve the address using an inaccessible mapping */ + vaddr = mmap(map->addr, map->size, 0, MAP_PRIVATE | + MAP_ANONYMOUS | additional_flags, -1, 0); + if (vaddr != MAP_FAILED) { + void *map_addr = NULL; + + if (map->size) { + /* actual map of first part */ + map_addr = cdx_map_resource(vaddr, vfio_dev_fd, + map->offset, map->size, + RTE_MAP_FORCE_ADDRESS); + } + + if (map_addr == NULL) { + munmap(vaddr, map->size); + vaddr = MAP_FAILED; + CDX_BUS_ERR("Failed to map cdx MMIO region %d", index); + return -1; + } + } else { + CDX_BUS_ERR("Failed to create inaccessible mapping for MMIO region %d", + index); + return -1; + } + + map->addr = vaddr; + return 0; +} + +/* + * region info may contain capability headers, so we need to keep reallocating + * the memory until we match allocated memory size with argsz. + */ +static int +cdx_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info, + int region) +{ + struct vfio_region_info *ri; + size_t argsz = sizeof(*ri); + int ret; + + ri = malloc(sizeof(*ri)); + if (ri == NULL) { + CDX_BUS_ERR("Cannot allocate memory for VFIO region info"); + return -1; + } +again: + memset(ri, 0, argsz); + ri->argsz = argsz; + ri->index = region; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri); + if (ret < 0) { + free(ri); + return ret; + } + if (ri->argsz != argsz) { + struct vfio_region_info *tmp; + + argsz = ri->argsz; + tmp = realloc(ri, argsz); + + if (tmp == NULL) { + /* realloc failed but the ri is still there */ + free(ri); + CDX_BUS_ERR("Cannot reallocate memory for VFIO region info"); + return -1; + } + ri = tmp; + goto again; + } + *info = ri; + + return 0; +} + +static int +find_max_end_va(const struct rte_memseg_list *msl, void *arg) +{ + size_t sz = msl->len; + void *end_va = RTE_PTR_ADD(msl->base_va, sz); + void **max_va = arg; + + if (*max_va < end_va) + *max_va = end_va; + return 0; +} + +static void * +cdx_find_max_end_va(void) +{ + void *va = NULL; + + rte_memseg_list_walk(find_max_end_va, &va); + return va; +} + +static int +cdx_vfio_map_resource_primary(struct rte_cdx_device *dev) +{ + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + char cdx_addr[PATH_MAX] = {0}; + static void *cdx_map_addr; + struct mapped_cdx_resource *vfio_res = NULL; + struct mapped_cdx_res_list *vfio_res_list = + RTE_TAILQ_CAST(cdx_vfio_tailq.head, mapped_cdx_res_list); + const char *dev_name = dev->device.name; + struct cdx_map *maps; + int vfio_dev_fd, i, ret; + + ret = rte_vfio_setup_device(rte_cdx_get_sysfs_path(), dev_name, + &vfio_dev_fd, &device_info); + if (ret) + return ret; + + /* allocate vfio_res and get region info */ + vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); + if (vfio_res == NULL) { + CDX_BUS_ERR("Cannot store VFIO mmap details"); + goto err_vfio_dev_fd; + } + memcpy(vfio_res->name, dev_name, RTE_DEV_NAME_MAX_LEN); + + /* get number of registers */ + vfio_res->nb_maps = device_info.num_regions; + + /* map memory regions */ + maps = vfio_res->maps; + + for (i = 0; i < vfio_res->nb_maps; i++) { + struct vfio_region_info *reg = NULL; + void *vaddr; + + ret = cdx_vfio_get_region_info(vfio_dev_fd, ®, i); + if (ret < 0) { + CDX_BUS_ERR("%s cannot get device region info error %i (%s)", + dev_name, errno, strerror(errno)); + goto err_vfio_res; + } + + /* skip non-mmappable regions */ + if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) { + free(reg); + continue; + } + + /* try mapping somewhere close to the end of hugepages */ + if (cdx_map_addr == NULL) + cdx_map_addr = cdx_find_max_end_va(); + + vaddr = cdx_map_addr; + cdx_map_addr = RTE_PTR_ADD(vaddr, (size_t)reg->size); + + cdx_map_addr = RTE_PTR_ALIGN(cdx_map_addr, + sysconf(_SC_PAGE_SIZE)); + + maps[i].addr = vaddr; + maps[i].offset = reg->offset; + maps[i].size = reg->size; + maps[i].path = NULL; /* vfio doesn't have per-resource paths */ + + ret = cdx_vfio_mmap_resource(vfio_dev_fd, vfio_res, i, 0); + if (ret < 0) { + CDX_BUS_ERR("%s mapping region %i failed: %s", + cdx_addr, i, strerror(errno)); + free(reg); + goto err_vfio_res; + } + + dev->mem_resource[i].addr = maps[i].addr; + dev->mem_resource[i].len = maps[i].size; + + free(reg); + } + + if (cdx_rte_vfio_setup_device(vfio_dev_fd) < 0) { + CDX_BUS_ERR("%s setup device failed", dev_name); + goto err_vfio_res; + } + + TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); + + return 0; +err_vfio_res: + cdx_vfio_find_and_unmap_resource(vfio_res_list, dev); + rte_free(vfio_res); +err_vfio_dev_fd: + rte_vfio_release_device(rte_cdx_get_sysfs_path(), + dev_name, vfio_dev_fd); + return -1; +} + +static int +cdx_vfio_map_resource_secondary(struct rte_cdx_device *dev) +{ + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + char cdx_addr[PATH_MAX] = {0}; + int vfio_dev_fd; + int i, ret; + struct mapped_cdx_resource *vfio_res = NULL; + struct mapped_cdx_res_list *vfio_res_list = + RTE_TAILQ_CAST(cdx_vfio_tailq.head, mapped_cdx_res_list); + const char *dev_name = dev->device.name; + struct cdx_map *maps; + + /* if we're in a secondary process, just find our tailq entry */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (strcmp(vfio_res->name, dev_name)) + continue; + break; + } + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + CDX_BUS_ERR("%s cannot find TAILQ entry for cdx device!", + dev_name); + return -1; + } + + ret = rte_vfio_setup_device(rte_cdx_get_sysfs_path(), dev_name, + &vfio_dev_fd, &device_info); + if (ret) + return ret; + + /* map MMIO regions */ + maps = vfio_res->maps; + + for (i = 0; i < vfio_res->nb_maps; i++) { + ret = cdx_vfio_mmap_resource(vfio_dev_fd, vfio_res, i, MAP_FIXED); + if (ret < 0) { + CDX_BUS_ERR("%s mapping MMIO region %i failed: %s", + dev_name, i, strerror(errno)); + goto err_vfio_dev_fd; + } + + dev->mem_resource[i].addr = maps[i].addr; + dev->mem_resource[i].len = maps[i].size; + } + + return 0; +err_vfio_dev_fd: + rte_vfio_release_device(rte_cdx_get_sysfs_path(), + cdx_addr, vfio_dev_fd); + return -1; +} + +/* + * map the CDX resources of a CDX device in virtual memory (VFIO version). + * primary and secondary processes follow almost exactly the same path + */ +int +cdx_vfio_map_resource(struct rte_cdx_device *dev) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + return cdx_vfio_map_resource_primary(dev); + else + return cdx_vfio_map_resource_secondary(dev); +} diff --git a/drivers/bus/cdx/meson.build b/drivers/bus/cdx/meson.build new file mode 100644 index 0000000000..f2ca104d34 --- /dev/null +++ b/drivers/bus/cdx/meson.build @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + +if not is_linux + build = false + reason = 'only supported on Linux' +endif + +driver_sdk_headers = files('bus_cdx_driver.h') +sources = files( + 'cdx.c', + 'cdx_vfio.c', +) diff --git a/drivers/bus/cdx/private.h b/drivers/bus/cdx/private.h new file mode 100644 index 0000000000..03a5093473 --- /dev/null +++ b/drivers/bus/cdx/private.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + */ + +#ifndef _CDX_PRIVATE_H_ +#define _CDX_PRIVATE_H_ + +#include <stdbool.h> +#include <stdio.h> + +#include "bus_cdx_driver.h" + +extern struct rte_cdx_bus rte_cdx_bus; + +/** + * Map a particular resource from a file. + * + * @param requested_addr + * The starting address for the new mapping range. + * @param fd + * The file descriptor. + * @param offset + * The offset for the mapping range. + * @param size + * The size for the mapping range. + * @param additional_flags + * The additional rte_mem_map() flags for the mapping range. + * @return + * - On success, the function returns a pointer to the mapped area. + * - On error, NULL is returned. + */ +void *cdx_map_resource(void *requested_addr, int fd, off_t offset, + size_t size, int additional_flags); + +/** + * Unmap a particular resource. + * + * @param requested_addr + * The address for the unmapping range. + * @param size + * The size for the unmapping range. + */ +void cdx_unmap_resource(void *requested_addr, size_t size); + +/* map/unmap VFIO resource */ +int cdx_vfio_map_resource(struct rte_cdx_device *dev); +int cdx_vfio_unmap_resource(struct rte_cdx_device *dev); + +#endif /* _CDX_PRIVATE_H_ */ diff --git a/drivers/bus/cdx/version.map b/drivers/bus/cdx/version.map new file mode 100644 index 0000000000..957fcab978 --- /dev/null +++ b/drivers/bus/cdx/version.map @@ -0,0 +1,11 @@ +INTERNAL { + global: + + rte_cdx_get_sysfs_path; + rte_cdx_map_device; + rte_cdx_register; + rte_cdx_unmap_device; + rte_cdx_unregister; + + local: *; +}; -- 2.17.1