>-----Original Message----- >From: Cédric Le Goater <c...@redhat.com> >Sent: Tuesday, November 7, 2023 9:33 PM >Subject: Re: [PATCH v4 26/41] backends/iommufd: Introduce the iommufd object > >On 11/2/23 08:12, Zhenzhong Duan wrote: >> From: Eric Auger <eric.au...@redhat.com> [...] >> diff --git a/qapi/qom.json b/qapi/qom.json >> index c53ef978ff..27300add48 100644 >> --- a/qapi/qom.json >> +++ b/qapi/qom.json >> @@ -794,6 +794,24 @@ >> { 'struct': 'VfioUserServerProperties', >> 'data': { 'socket': 'SocketAddress', 'device': 'str' } } >> >> +## >> +# @IOMMUFDProperties: >> +# >> +# Properties for iommufd objects. >> +# >> +# @fd: file descriptor name previously passed via 'getfd' command, >> +# which represents a pre-opened /dev/iommu. This allows the >> +# iommufd object to be shared accross several subsystems >> +# (VFIO, VDPA, ...), and the file descriptor to be shared >> +# with other process, e.g. DPDK. (default: QEMU opens >> +# /dev/iommu by itself) >> +# >> +# Since: 8.2 >> +## >> +{ 'struct': 'IOMMUFDProperties', >> + 'data': { '*fd': 'str' }, >> + 'if': 'CONFIG_IOMMUFD' } > > >Activating or not IOMMUFD on a platform is a configuration choice >and it is not a dependency on an external resource. I would make >things simpler and drop all the #ifdef in the documentation files.
Yes, that will be cleaner. > >There might be a way to remove the documentation also. Not a big >issue for now. [...] >> diff --git a/backends/iommufd-stub.c b/backends/iommufd-stub.c > >I don't think this stub file is needed. Please drop. Will do. > >> new file mode 100644 >> index 0000000000..02ac844c17 >> --- /dev/null >> +++ b/backends/iommufd-stub.c >> @@ -0,0 +1,59 @@ >> +/* >> + * iommufd container backend stub >> + * >> + * Copyright (C) 2023 Intel Corporation. >> + * Copyright Red Hat, Inc. 2023 >> + * >> + * Authors: Yi Liu <yi.l....@intel.com> >> + * Eric Auger <eric.au...@redhat.com> >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + >> + * You should have received a copy of the GNU General Public License along >> + * with this program; if not, see <http://www.gnu.org/licenses/>. >> + */ >> + >> +#include "qemu/osdep.h" >> +#include "sysemu/iommufd.h" >> +#include "qemu/error-report.h" >> + >> +int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) >> +{ >> + return 0; >> +} >> +void iommufd_backend_disconnect(IOMMUFDBackend *be) >> +{ >> +} >> +void iommufd_backend_free_id(int fd, uint32_t id) >> +{ >> +} >> +int iommufd_backend_get_ioas(IOMMUFDBackend *be, uint32_t *ioas_id) >> +{ >> + return 0; >> +} >> +void iommufd_backend_put_ioas(IOMMUFDBackend *be, uint32_t ioas_id) >> +{ >> +} >> +int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, >hwaddr iova, >> + ram_addr_t size, void *vaddr, bool readonly) >> +{ >> + return 0; >> +} >> +int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, >> + hwaddr iova, ram_addr_t size) >> +{ >> + return 0; >> +} >> +int iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, >> + uint32_t pt_id, uint32_t *out_hwpt) >> +{ >> + return 0; >> +} >> diff --git a/backends/iommufd.c b/backends/iommufd.c >> new file mode 100644 >> index 0000000000..a526d58824 >> --- /dev/null >> +++ b/backends/iommufd.c >> @@ -0,0 +1,257 @@ >> +/* >> + * iommufd container backend >> + * >> + * Copyright (C) 2023 Intel Corporation. >> + * Copyright Red Hat, Inc. 2023 >> + * >> + * Authors: Yi Liu <yi.l....@intel.com> >> + * Eric Auger <eric.au...@redhat.com> >> + * >> + * SPDX-License-Identifier: GPL-2.0-or-later >> + */ >> + >> +#include "qemu/osdep.h" >> +#include "sysemu/iommufd.h" >> +#include "qapi/error.h" >> +#include "qapi/qmp/qerror.h" >> +#include "qemu/module.h" >> +#include "qom/object_interfaces.h" >> +#include "qemu/error-report.h" >> +#include "monitor/monitor.h" >> +#include "trace.h" >> +#include <sys/ioctl.h> >> +#include <linux/iommufd.h> >> + >> +static void iommufd_backend_init(Object *obj) >> +{ >> + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); >> + >> + be->fd = -1; >> + be->users = 0; >> + be->owned = true; >> + qemu_mutex_init(&be->lock); >> +} >> + >> +static void iommufd_backend_finalize(Object *obj) >> +{ >> + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); >> + >> + if (be->owned) { >> + close(be->fd); >> + be->fd = -1; >> + } >> +} >> + >> +static void iommufd_backend_set_fd(Object *obj, const char *str, Error >> **errp) >> +{ >> + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); >> + int fd = -1; >> + >> + fd = monitor_fd_param(monitor_cur(), str, errp); >> + if (fd == -1) { >> + error_prepend(errp, "Could not parse remote object fd %s:", str); >> + return; >> + } >> + qemu_mutex_lock(&be->lock); >> + be->fd = fd; >> + be->owned = false; >> + qemu_mutex_unlock(&be->lock); >> + trace_iommu_backend_set_fd(be->fd); >> +} >> + >> +static void iommufd_backend_class_init(ObjectClass *oc, void *data) >> +{ >> + object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); >> +} >> + >> +int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) >> +{ >> + int fd, ret = 0; >> + >> + qemu_mutex_lock(&be->lock); >> + if (be->users == UINT32_MAX) { >> + error_setg(errp, "too many connections"); >> + ret = -E2BIG; >> + goto out; >> + } >> + if (be->owned && !be->users) { >> + fd = qemu_open_old("/dev/iommu", O_RDWR); >> + if (fd < 0) { >> + error_setg_errno(errp, errno, "/dev/iommu opening failed"); >> + ret = fd; >> + goto out; >> + } >> + be->fd = fd; >> + } >> + be->users++; >> +out: >> + trace_iommufd_backend_connect(be->fd, be->owned, >> + be->users, ret); >> + qemu_mutex_unlock(&be->lock); >> + return ret; >> +} >> + >> +void iommufd_backend_disconnect(IOMMUFDBackend *be) >> +{ >> + qemu_mutex_lock(&be->lock); >> + if (!be->users) { >> + goto out; >> + } >> + be->users--; >> + if (!be->users && be->owned) { >> + close(be->fd); >> + be->fd = -1; >> + } >> +out: >> + trace_iommufd_backend_disconnect(be->fd, be->users); >> + qemu_mutex_unlock(&be->lock); >> +} >> + >> +static int iommufd_backend_alloc_ioas(int fd, uint32_t *ioas_id) >> +{ >> + int ret; >> + struct iommu_ioas_alloc alloc_data = { >> + .size = sizeof(alloc_data), >> + .flags = 0, >> + }; >> + >> + ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); >> + if (ret) { >> + error_report("Failed to allocate ioas %m"); >> + } >> + >> + *ioas_id = alloc_data.out_ioas_id; >> + trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret); >> + >> + return ret; >> +} >> + >> +void iommufd_backend_free_id(int fd, uint32_t id) >> +{ >> + int ret; >> + struct iommu_destroy des = { >> + .size = sizeof(des), >> + .id = id, >> + }; >> + >> + ret = ioctl(fd, IOMMU_DESTROY, &des); >> + trace_iommufd_backend_free_id(fd, id, ret); >> + if (ret) { >> + error_report("Failed to free id: %u %m", id); >> + } >> +} >> + >> +int iommufd_backend_get_ioas(IOMMUFDBackend *be, uint32_t *ioas_id) >> +{ >> + int ret; >> + >> + ret = iommufd_backend_alloc_ioas(be->fd, ioas_id); >> + trace_iommufd_backend_get_ioas(be->fd, *ioas_id, ret); >> + return ret; >> +} >> + >> +void iommufd_backend_put_ioas(IOMMUFDBackend *be, uint32_t ioas_id) >> +{ >> + iommufd_backend_free_id(be->fd, ioas_id); >> + trace_iommufd_backend_put_ioas(be->fd, ioas_id); >> +} >> + >> +int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, >hwaddr iova, >> + ram_addr_t size, void *vaddr, bool readonly) >> +{ >> + int ret; >> + struct iommu_ioas_map map = { >> + .size = sizeof(map), >> + .flags = IOMMU_IOAS_MAP_READABLE | >> + IOMMU_IOAS_MAP_FIXED_IOVA, >> + .ioas_id = ioas_id, >> + .__reserved = 0, >> + .user_va = (uintptr_t)vaddr, >> + .iova = iova, >> + .length = size, >> + }; >> + >> + if (!readonly) { >> + map.flags |= IOMMU_IOAS_MAP_WRITEABLE; >> + } >> + >> + ret = ioctl(be->fd, IOMMU_IOAS_MAP, &map); >> + trace_iommufd_backend_map_dma(be->fd, ioas_id, iova, size, >> + vaddr, readonly, ret); >> + if (ret) { >> + error_report("IOMMU_IOAS_MAP failed: %m"); >> + } >> + return !ret ? 0 : -errno; >> +} >> + >> +int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, >> + hwaddr iova, ram_addr_t size) >> +{ >> + int ret; >> + struct iommu_ioas_unmap unmap = { >> + .size = sizeof(unmap), >> + .ioas_id = ioas_id, >> + .iova = iova, >> + .length = size, >> + }; >> + >> + ret = ioctl(be->fd, IOMMU_IOAS_UNMAP, &unmap); >> + trace_iommufd_backend_unmap_dma(be->fd, ioas_id, iova, size, ret); >> + /* >> + * TODO: IOMMUFD doesn't support mapping PCI BARs for now. >> + * It's not a problem if there is no p2p dma, relax it here >> + * and avoid many noisy trigger from vIOMMU side. > >Should we add a warn_report() ? The purpose of checking "ret && errno == ENOENT" is to avoid many error_report() for PCI BARs, If we add warn_report(), there will still be many print for PCI BARs. > >> + */ >> + if (ret && errno == ENOENT) { >> + ret = 0; >> + } >> + if (ret) { >> + error_report("IOMMU_IOAS_UNMAP failed: %m"); >> + } >> + return !ret ? 0 : -errno; >> +} >> + >> +int iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, >> + uint32_t pt_id, uint32_t *out_hwpt) >> +{ >> + int ret; >> + struct iommu_hwpt_alloc alloc_hwpt = { >> + .size = sizeof(struct iommu_hwpt_alloc), >> + .flags = 0, >> + .dev_id = dev_id, >> + .pt_id = pt_id, >> + .__reserved = 0, >> + }; >> + >> + ret = ioctl(iommufd, IOMMU_HWPT_ALLOC, &alloc_hwpt); >> + trace_iommufd_backend_alloc_hwpt(iommufd, dev_id, pt_id, >> + alloc_hwpt.out_hwpt_id, ret); >> + >> + if (ret) { >> + error_report("IOMMU_HWPT_ALLOC failed: %m"); >> + } else { >> + *out_hwpt = alloc_hwpt.out_hwpt_id; >> + } >> + return !ret ? 0 : -errno; >> +} >> + >> +static const TypeInfo iommufd_backend_info = { >> + .name = TYPE_IOMMUFD_BACKEND, >> + .parent = TYPE_OBJECT, >> + .instance_size = sizeof(IOMMUFDBackend), >> + .instance_init = iommufd_backend_init, >> + .instance_finalize = iommufd_backend_finalize, >> + .class_size = sizeof(IOMMUFDBackendClass), >> + .class_init = iommufd_backend_class_init, >> + .interfaces = (InterfaceInfo[]) { >> + { TYPE_USER_CREATABLE }, >> + { } >> + } >> +}; >> + >> +static void register_types(void) >> +{ >> + type_register_static(&iommufd_backend_info); >> +} >> + >> +type_init(register_types); >> diff --git a/backends/Kconfig b/backends/Kconfig >> index f35abc1609..2cb23f62fa 100644 >> --- a/backends/Kconfig >> +++ b/backends/Kconfig >> @@ -1 +1,5 @@ >> source tpm/Kconfig >> + >> +config IOMMUFD >> + bool >> + depends on VFIO >> diff --git a/backends/meson.build b/backends/meson.build >> index 914c7c4afb..05ac57ff15 100644 >> --- a/backends/meson.build >> +++ b/backends/meson.build >> @@ -20,6 +20,11 @@ if have_vhost_user >> system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) >> endif >> system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev- >vhost.c')) >> +if have_iommufd >> + system_ss.add(files('iommufd.c')) >> +else >> + system_ss.add(files('iommufd-stub.c')) >> +endif > >replace with : > > system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c')) > >and drop iommufd-stub.c which will become useless. Will do. > > > >> if have_vhost_user_crypto >> system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev- >vhost-user.c')) >> endif >> diff --git a/backends/trace-events b/backends/trace-events >> index 652eb76a57..e5f828bca2 100644 >> --- a/backends/trace-events >> +++ b/backends/trace-events >> @@ -5,3 +5,15 @@ dbus_vmstate_pre_save(void) >> dbus_vmstate_post_load(int version_id) "version_id: %d" >> dbus_vmstate_loading(const char *id) "id: %s" >> dbus_vmstate_saving(const char *id) "id: %s" >> + >> +# iommufd.c >> +iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d >owned=%d users=%d (%d)" >> +iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" >> +iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" >> +iommufd_backend_get_ioas(int iommufd, uint32_t ioas, int ret) " >iommufd=%d ioas=%d (%d)" >> +iommufd_backend_put_ioas(int iommufd, uint32_t ioas) " iommufd=%d >ioas=%d" >> +iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, >uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d >iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" >> +iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, >uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" >size=0x%"PRIx64" (%d)" >> +iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " >iommufd=%d ioas=%d (%d)" >> +iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d >id=%d (%d)" >> +iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, >uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u out_hwpt=%u >(%d)" >> diff --git a/qemu-options.hx b/qemu-options.hx >> index e26230bac5..ddfaddf8ce 100644 >> --- a/qemu-options.hx >> +++ b/qemu-options.hx >> @@ -5210,6 +5210,19 @@ SRST >> >> The ``share`` boolean option is on by default with memfd. >> >> +#ifdef CONFIG_IOMMUFD > >Please remove. Will do. Thanks Zhenzhong