From: Yahui Cao <yahui....@intel.com> IOMMUFD is a new standalone IOMMU subsystem introduced in Linux.
Linux now includes multiple device-passthrough frameworks (e.g. VFIO and vDPA) and those frameworks implements their own logic for managing I/O page tables, which is hard to scale to support modern IOMMU features like PASID, I/O page fault, IOMMU dirty page tracking. The goal of IOMMUFD is to make Linux subsystems like VFIO and vDPA to consume a unified IOMMU framework. This patch exports basic enable function, default isolation domain and per-IOMMUFD dma mapping function. The IOMMUFD consumer should use the default isolation domain and dma mapping function when user-initiated DMA is required. Signed-off-by: Yahui Cao <yahui....@intel.com> Signed-off-by: Beilei Xing <beilei.x...@intel.com> --- config/meson.build | 3 + config/rte_config.h | 1 + lib/eal/include/rte_iommufd.h | 73 ++++++++++++++ lib/eal/linux/eal.c | 22 ++++ lib/eal/linux/eal_iommufd.c | 183 ++++++++++++++++++++++++++++++++++ lib/eal/linux/eal_iommufd.h | 43 ++++++++ lib/eal/linux/meson.build | 1 + lib/eal/version.map | 3 + 8 files changed, 329 insertions(+) create mode 100644 lib/eal/include/rte_iommufd.h create mode 100644 lib/eal/linux/eal_iommufd.c create mode 100644 lib/eal/linux/eal_iommufd.h diff --git a/config/meson.build b/config/meson.build index a9ccd56deb..93c63984c8 100644 --- a/config/meson.build +++ b/config/meson.build @@ -442,6 +442,9 @@ install_headers(['rte_config.h'], # enable VFIO only if it is linux OS dpdk_conf.set('RTE_EAL_VFIO', is_linux) +# enable IOMMUFD only if it is linux OS +dpdk_conf.set('RTE_EAL_IOMMUFD', is_linux) + # specify -D_GNU_SOURCE unconditionally add_project_arguments('-D_GNU_SOURCE', language: 'c') diff --git a/config/rte_config.h b/config/rte_config.h index da265d7dd2..25a6dccd8f 100644 --- a/config/rte_config.h +++ b/config/rte_config.h @@ -38,6 +38,7 @@ #define RTE_MAX_TAILQ 32 #define RTE_LOG_DP_LEVEL RTE_LOG_INFO #define RTE_MAX_VFIO_CONTAINERS 64 +#define RTE_MAX_IOMMUFD_FD 1 /* bsd module defines */ #define RTE_CONTIGMEM_MAX_NUM_BUFS 64 diff --git a/lib/eal/include/rte_iommufd.h b/lib/eal/include/rte_iommufd.h new file mode 100644 index 0000000000..ac42713018 --- /dev/null +++ b/lib/eal/include/rte_iommufd.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2023 Intel Corporation + */ + +#ifndef _RTE_IOMMUFD_H_ +#define _RTE_IOMMUFD_H_ + +/** + * @file + * RTE IOMMUFD. This library provides various IOMMUFD related utility functions. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdbool.h> +#include <stdint.h> + +#include <rte_compat.h> +/* + * determine if IOMMUFD is present on the system + */ +#if !defined(IOMMUFD_PRESENT) && defined(RTE_EAL_IOMMUFD) +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 6, 0) +#define IOMMUFD_PRESENT +#endif /* kernel version >= 6.6.0 */ +#endif /* RTE_EAL_IOMMUFD */ + +#ifdef IOMMUFD_PRESENT + +#define IOMMUFD_PATH "/dev/iommu" + +#else /* not IOMMUFD_PRESENT */ +#endif /* IOMMUFD_PRESENT */ + +/** + * Enable a IOMMUFD-related kmod. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param modname + * kernel module name. + * + * @return + * 0 on success. + * <0 on failure. + */ +__rte_experimental +int rte_iommufd_enable(const char *modname); + +/** + * Check whether a IOMMUFD-related kmod is enabled. + * + * This function is only relevant to Linux. + * + * @param modname + * kernel module name. + * + * @return + * 1 if true. + * 0 otherwise. + */ +__rte_experimental +int rte_iommufd_is_enabled(const char *modname); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_IOMMUFD_H_ */ diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c index 57da058cec..4c8e0a7b6e 100644 --- a/lib/eal/linux/eal.c +++ b/lib/eal/linux/eal.c @@ -41,6 +41,7 @@ #include <rte_version.h> #include <malloc_heap.h> #include <rte_vfio.h> +#include <rte_iommufd.h> #include <telemetry_internal.h> #include "eal_private.h" @@ -52,6 +53,7 @@ #include "eal_trace.h" #include "eal_options.h" #include "eal_vfio.h" +#include "eal_iommufd.h" #include "hotplug_mp.h" #include "log_internal.h" @@ -877,6 +879,16 @@ static int rte_eal_vfio_setup(void) } #endif +#ifdef IOMMUFD_PRESENT +static int rte_eal_iommufd_setup(void) +{ + if (rte_iommufd_enable("iommufd")) + return -1; + + return 0; +} +#endif + static void rte_eal_init_alert(const char *msg) { fprintf(stderr, "EAL: FATAL: %s\n", msg); @@ -1162,6 +1174,16 @@ rte_eal_init(int argc, char **argv) return -1; } #endif + +#ifdef IOMMUFD_PRESENT + if (rte_eal_iommufd_setup() < 0) { + rte_eal_init_alert("Cannot init IOMMUFD"); + rte_errno = EAGAIN; + rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed); + return -1; + } +#endif + /* in secondary processes, memory init may allocate additional fbarrays * not present in primary processes, so to avoid any potential issues, * initialize memzones first. diff --git a/lib/eal/linux/eal_iommufd.c b/lib/eal/linux/eal_iommufd.c new file mode 100644 index 0000000000..8866aa60c1 --- /dev/null +++ b/lib/eal/linux/eal_iommufd.c @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2023 Intel Corporation + */ + +#include <inttypes.h> +#include <sys/ioctl.h> +#include <fcntl.h> + +#include <rte_iommufd.h> +#include <rte_spinlock.h> +#include <rte_errno.h> + +#include "eal_iommufd.h" +#include "eal_private.h" + +#ifdef IOMMUFD_PRESENT +#include <linux/iommufd.h> + +/* per-process IOMMUFD config */ +static struct iommufd_config iommufd_cfgs[IOMMUFD_MAX_FD]; +struct iommufd_config *default_iommufd_cfg = &iommufd_cfgs[0]; + +static void +iommufd_get_ioas(int *iommufd, uint32_t *ioas_id) +{ + int iommu_fd, ret; + struct iommu_ioas_alloc alloc_data = {}; + + *iommufd = -1; + iommu_fd = open(IOMMUFD_PATH, O_RDWR); + if (iommu_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to open iommufd!\n"); + return; + } + + alloc_data.size = sizeof(alloc_data); + ret = ioctl(iommu_fd, IOMMU_IOAS_ALLOC, &alloc_data); + if (ret) { + RTE_LOG(ERR, EAL, "Failed to alloc ioas!\n"); + return; + } + + *iommufd = iommu_fd; + *ioas_id = alloc_data.out_ioas_id; +} + +int +rte_iommufd_enable(const char *modname) +{ + /* initialize device list */ + int i; + int iommufd_available; + const struct internal_config *internal_conf = + eal_get_internal_configuration(); + + for (i = 0; i < IOMMUFD_MAX_FD; i++) { + iommufd_cfgs[i].iommufd_enabled = 0; + iommufd_cfgs[i].iommufd = -1; + iommufd_cfgs[i].ioas_id = 0; + iommufd_cfgs[i].dma_init = false; + } + + RTE_LOG(DEBUG, EAL, "Probing IOMMUFD support...\n"); + + /* check if iommufd module is loaded */ + iommufd_available = rte_eal_check_module(modname); + + /* return error directly */ + if (iommufd_available == -1) { + RTE_LOG(INFO, EAL, "Could not get loaded module details!\n"); + return -1; + } + + /* return 0 if IOMMUFD modules not loaded */ + if (iommufd_available == 0) { + RTE_LOG(DEBUG, EAL, + "IOMMUFD modules not loaded, skipping IOMMUFD support...\n"); + return 0; + } + + if (internal_conf->process_type == RTE_PROC_PRIMARY) + iommufd_get_ioas(&default_iommufd_cfg->iommufd, &default_iommufd_cfg->ioas_id); + + /* check if we have IOMMUFD driver enabled */ + if (default_iommufd_cfg->iommufd != -1) { + RTE_LOG(INFO, EAL, "IOMMUFD support initialized\n"); + default_iommufd_cfg->iommufd_enabled = 1; + } else { + RTE_LOG(NOTICE, EAL, "IOMMUFD support could not be initialized\n"); + } + + return 0; +} + +int +rte_iommufd_is_enabled(const char *modname) +{ + const int mod_available = rte_eal_check_module(modname) > 0; + return default_iommufd_cfg->iommufd_enabled && mod_available; +} + +int +iommufd_dma_mem_map(int iommufd, uint32_t ioasid, uint64_t vaddr, + uint64_t iova, uint64_t len, int do_map) +{ + struct iommu_ioas_map dma_map; + struct iommu_ioas_unmap dma_unmap; + int ret; + + if (do_map != 0) { + memset(&dma_map, 0, sizeof(dma_map)); + dma_map.ioas_id = ioasid; + dma_map.size = sizeof(struct iommu_ioas_map); + dma_map.user_va = vaddr; + dma_map.length = len; + dma_map.iova = iova; + dma_map.flags = IOMMU_IOAS_MAP_READABLE | + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_FIXED_IOVA; + + ret = ioctl(iommufd, IOMMU_IOAS_MAP, &dma_map); + if (ret) { + /** + * In case the mapping was already done EEXIST will be + * returned from kernel. + */ + if (errno == EEXIST) { + RTE_LOG(DEBUG, EAL, + "Memory segment is already mapped, skipping"); + } else { + RTE_LOG(ERR, EAL, + "Cannot set up DMA remapping, error " + "%i (%s)\n", errno, strerror(errno)); + return -1; + } + } + } else { + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.ioas_id = ioasid; + dma_unmap.size = sizeof(struct iommu_ioas_unmap); + dma_unmap.length = len; + dma_unmap.iova = iova; + + ret = ioctl(iommufd, IOMMU_IOAS_UNMAP, &dma_unmap); + if (ret) { + RTE_LOG(ERR, EAL, "Cannot clear DMA remapping, error " + "%i (%s)\n", errno, strerror(errno)); + return -1; + } else if (dma_unmap.length != len) { + RTE_LOG(ERR, EAL, "Unexpected size %"PRIu64 + " of DMA remapping cleared instead of %"PRIu64"\n", + (uint64_t)dma_unmap.size, len); + rte_errno = EIO; + return -1; + } + } + + return 0; +} + +#else /* not IOMMUFD_PRESENT */ + +int +rte_iommufd_enable(__rte_unused const char *modname) +{ + return -1; +} + +int +rte_iommufd_is_enabled(__rte_unused const char *modname) +{ + return -1; +} + +int +iommufd_dma_mem_map(__rte_unused int iommufd, __rte_unused uint32_t ioasid, + __rte_unused uint64_t vaddr, __rte_unused uint64_t iova, + __rte_unused uint64_t len, __rte_unused int do_map) +{ + return -1; +} + +#endif /* IOMMUFD_PRESENT */ diff --git a/lib/eal/linux/eal_iommufd.h b/lib/eal/linux/eal_iommufd.h new file mode 100644 index 0000000000..d9b67a7fd9 --- /dev/null +++ b/lib/eal/linux/eal_iommufd.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2023 Intel Corporation + */ + +#ifndef EAL_IOMMUFD_H_ +#define EAL_IOMMUFD_H_ + +#include <rte_common.h> +#include <stdbool.h> + +/* + * determine if IOMMUFD is present on the system + */ +#if !defined(IOMMUFD_PRESENT) && defined(RTE_EAL_IOMMUFD) +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 6, 0) +#define IOMMUFD_PRESENT +#else +#pragma message("IOMMUFD configured but not supported by this kernel, disabling.") +#endif /* kernel version >= 6.6.0 */ +#endif /* RTE_EAL_IOMMUFD */ + +#ifdef IOMMUFD_PRESENT + +#define IOMMUFD_MAX_FD RTE_MAX_IOMMUFD_FD + +struct iommufd_config { + int iommufd_enabled; + int iommufd; + uint32_t ioas_id; + bool dma_init; +}; + +/* per-process IOMMUFD config */ +extern struct iommufd_config *default_iommufd_cfg; + +#endif /* IOMMUFD_PRESENT */ + +int +iommufd_dma_mem_map(int iommufd, uint32_t ioasid, uint64_t vaddr, + uint64_t iova, uint64_t len, int do_map); + +#endif /* EAL_IOMMUFD_H_ */ diff --git a/lib/eal/linux/meson.build b/lib/eal/linux/meson.build index e99ebed256..8081087584 100644 --- a/lib/eal/linux/meson.build +++ b/lib/eal/linux/meson.build @@ -16,6 +16,7 @@ sources += files( 'eal_thread.c', 'eal_timer.c', 'eal_vfio.c', + 'eal_iommufd.c', 'eal_vfio_mp_sync.c', ) diff --git a/lib/eal/version.map b/lib/eal/version.map index 5e0cd47c82..30e66a7267 100644 --- a/lib/eal/version.map +++ b/lib/eal/version.map @@ -393,6 +393,9 @@ EXPERIMENTAL { # added in 23.07 rte_memzone_max_get; rte_memzone_max_set; + + rte_iommufd_enable; # WINDOWS_NO_EXPORT + rte_iommufd_is_enabled; # WINDOWS_NO_EXPORT }; INTERNAL { -- 2.34.1