Hi Cédric, On 12/21/23 17:00, Cédric Le Goater wrote: > [ ... ] > > >> +static void iommufd_backend_init(Object *obj) >> +{ >> + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); >> + >> + be->fd = -1; >> + be->users = 0; >> + be->owned = true; >> + qemu_mutex_init(&be->lock);> +} >> + >> +static void iommufd_backend_finalize(Object *obj) >> +{ >> + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); >> + >> + if (be->owned) { >> + close(be->fd); >> + be->fd = -1; >> + } >> +} >> + >> +static void iommufd_backend_set_fd(Object *obj, const char *str, >> Error **errp) >> +{ >> + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); >> + int fd = -1; >> + >> + fd = monitor_fd_param(monitor_cur(), str, errp); >> + if (fd == -1) { >> + error_prepend(errp, "Could not parse remote object fd %s:", >> str); >> + return; >> + } >> + qemu_mutex_lock(&be->lock); >> + be->fd = fd; >> + be->owned = false; >> + qemu_mutex_unlock(&be->lock); >> + trace_iommu_backend_set_fd(be->fd); >> +} >> + >> +static bool iommufd_backend_can_be_deleted(UserCreatable *uc) >> +{ >> + IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); >> + >> + return !be->users; > > Coverity CID 1531549 reports a concurrent data access violation because > be->users is being accessed without holding the mutex. > > I wonder how useful is this mutex anyhow, since the code paths should > be protected by the BQL lock. If you agree, I will send an update to > simply drop be->lock and solve this report. I am not totally comfortable with the fact BQL covers the same protection? Please can you elaborate.
Thanks Eric > > Thanks, > > C. > > > >> +} >> + >> +static void iommufd_backend_class_init(ObjectClass *oc, void *data) >> +{ >> + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); >> + >> + ucc->can_be_deleted = iommufd_backend_can_be_deleted; >> + >> + object_class_property_add_str(oc, "fd", NULL, >> iommufd_backend_set_fd); >> +} >> + >> +int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) >> +{ >> + int fd, ret = 0; >> + >> + qemu_mutex_lock(&be->lock); >> + if (be->users == UINT32_MAX) { >> + error_setg(errp, "too many connections"); >> + ret = -E2BIG; >> + goto out; >> + } >> + if (be->owned && !be->users) { >> + fd = qemu_open_old("/dev/iommu", O_RDWR); >> + if (fd < 0) { >> + error_setg_errno(errp, errno, "/dev/iommu opening failed"); >> + ret = fd; >> + goto out; >> + } >> + be->fd = fd; >> + } >> + be->users++; >> +out: >> + trace_iommufd_backend_connect(be->fd, be->owned, >> + be->users, ret); >> + qemu_mutex_unlock(&be->lock); >> + return ret; >> +} >> + >> +void iommufd_backend_disconnect(IOMMUFDBackend *be) >> +{ >> + qemu_mutex_lock(&be->lock); >> + if (!be->users) { >> + goto out; >> + } >> + be->users--; >> + if (!be->users && be->owned) { >> + close(be->fd); >> + be->fd = -1; >> + } >> +out: >> + trace_iommufd_backend_disconnect(be->fd, be->users); >> + qemu_mutex_unlock(&be->lock); >> +} >> + >> +int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, >> + Error **errp) >> +{ >> + int ret, fd = be->fd; >> + struct iommu_ioas_alloc alloc_data = { >> + .size = sizeof(alloc_data), >> + .flags = 0, >> + }; >> + >> + ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); >> + if (ret) { >> + error_setg_errno(errp, errno, "Failed to allocate ioas"); >> + return ret; >> + } >> + >> + *ioas_id = alloc_data.out_ioas_id; >> + trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret); >> + >> + return ret; >> +} >> + >> +void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) >> +{ >> + int ret, fd = be->fd; >> + struct iommu_destroy des = { >> + .size = sizeof(des), >> + .id = id, >> + }; >> + >> + ret = ioctl(fd, IOMMU_DESTROY, &des); >> + trace_iommufd_backend_free_id(fd, id, ret); >> + if (ret) { >> + error_report("Failed to free id: %u %m", id); >> + } >> +} >> + >> +int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, >> hwaddr iova, >> + ram_addr_t size, void *vaddr, bool >> readonly) >> +{ >> + int ret, fd = be->fd; >> + struct iommu_ioas_map map = { >> + .size = sizeof(map), >> + .flags = IOMMU_IOAS_MAP_READABLE | >> + IOMMU_IOAS_MAP_FIXED_IOVA, >> + .ioas_id = ioas_id, >> + .__reserved = 0, >> + .user_va = (uintptr_t)vaddr, >> + .iova = iova, >> + .length = size, >> + }; >> + >> + if (!readonly) { >> + map.flags |= IOMMU_IOAS_MAP_WRITEABLE; >> + } >> + >> + ret = ioctl(fd, IOMMU_IOAS_MAP, &map); >> + trace_iommufd_backend_map_dma(fd, ioas_id, iova, size, >> + vaddr, readonly, ret); >> + if (ret) { >> + ret = -errno; >> + >> + /* TODO: Not support mapping hardware PCI BAR region for >> now. */ >> + if (errno == EFAULT) { >> + warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?"); >> + } else { >> + error_report("IOMMU_IOAS_MAP failed: %m"); >> + } >> + } >> + return ret; >> +} >> + >> +int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, >> + hwaddr iova, ram_addr_t size) >> +{ >> + int ret, fd = be->fd; >> + struct iommu_ioas_unmap unmap = { >> + .size = sizeof(unmap), >> + .ioas_id = ioas_id, >> + .iova = iova, >> + .length = size, >> + }; >> + >> + ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); >> + /* >> + * IOMMUFD takes mapping as some kind of object, unmapping >> + * nonexistent mapping is treated as deleting a nonexistent >> + * object and return ENOENT. This is different from legacy >> + * backend which allows it. vIOMMU may trigger a lot of >> + * redundant unmapping, to avoid flush the log, treat them >> + * as succeess for IOMMUFD just like legacy backend. >> + */ >> + if (ret && errno == ENOENT) { >> + trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, >> size, ret); >> + ret = 0; >> + } else { >> + trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret); >> + } >> + >> + if (ret) { >> + ret = -errno; >> + error_report("IOMMU_IOAS_UNMAP failed: %m"); >> + } >> + return ret; >> +} >> + >> +static const TypeInfo iommufd_backend_info = { >> + .name = TYPE_IOMMUFD_BACKEND, >> + .parent = TYPE_OBJECT, >> + .instance_size = sizeof(IOMMUFDBackend), >> + .instance_init = iommufd_backend_init, >> + .instance_finalize = iommufd_backend_finalize, >> + .class_size = sizeof(IOMMUFDBackendClass), >> + .class_init = iommufd_backend_class_init, >> + .interfaces = (InterfaceInfo[]) { >> + { TYPE_USER_CREATABLE }, >> + { } >> + } >> +}; >> + >> +static void register_types(void) >> +{ >> + type_register_static(&iommufd_backend_info); >> +} >> + >> +type_init(register_types); >> diff --git a/backends/Kconfig b/backends/Kconfig >> index >> f35abc16092808b1fe5b033a346908e2d66bff0b..2cb23f62fa1526cedafedcc99a032e098075b846 >> 100644 >> --- a/backends/Kconfig >> +++ b/backends/Kconfig >> @@ -1 +1,5 @@ >> source tpm/Kconfig >> + >> +config IOMMUFD >> + bool >> + depends on VFIO >> diff --git a/backends/meson.build b/backends/meson.build >> index >> 914c7c4afb905cfe710ad23dd1ee42907f6d1679..9a5cea480d172d50a641e4d9179093e8155f2db1 >> 100644 >> --- a/backends/meson.build >> +++ b/backends/meson.build >> @@ -20,6 +20,7 @@ if have_vhost_user >> system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) >> endif >> system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: >> files('cryptodev-vhost.c')) >> +system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c')) >> if have_vhost_user_crypto >> system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: >> files('cryptodev-vhost-user.c')) >> endif >> diff --git a/backends/trace-events b/backends/trace-events >> index >> 652eb76a5723e2053fe97338c481309c58284d6a..d45c6e31a67ed66d94787f60eb08a525cf6ff68b >> 100644 >> --- a/backends/trace-events >> +++ b/backends/trace-events >> @@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void) >> dbus_vmstate_post_load(int version_id) "version_id: %d" >> dbus_vmstate_loading(const char *id) "id: %s" >> dbus_vmstate_saving(const char *id) "id: %s" >> + >> +# iommufd.c >> +iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) >> "fd=%d owned=%d users=%d (%d)" >> +iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" >> +iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" >> +iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, >> uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d >> ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" >> +iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, >> uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: >> iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" >> +iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, >> uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" >> size=0x%"PRIx64" (%d)" >> +iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " >> iommufd=%d ioas=%d (%d)" >> +iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " >> iommufd=%d id=%d (%d)" >> diff --git a/qemu-options.hx b/qemu-options.hx >> index >> 42fd09e4de96e962cd5873c49501f6e1dbb5e346..5fe8ea57d2d2f9390a976ef2fefe86463e888bb1 >> 100644 >> --- a/qemu-options.hx >> +++ b/qemu-options.hx >> @@ -5224,6 +5224,18 @@ SRST >> The ``share`` boolean option is on by default with memfd. >> + ``-object iommufd,id=id[,fd=fd]`` >> + Creates an iommufd backend which allows control of DMA mapping >> + through the ``/dev/iommu`` device. >> + >> + The ``id`` parameter is a unique ID which frontends (such as >> + vfio-pci of vdpa) will use to connect with the iommufd backend. >> + >> + The ``fd`` parameter is an optional pre-opened file descriptor >> + resulting from ``/dev/iommu`` opening. Usually the iommufd >> is shared >> + across all subsystems, bringing the benefit of centralized >> + reference counting. >> + >> ``-object rng-builtin,id=id`` >> Creates a random number generator backend which obtains >> entropy >> from QEMU builtin functions. The ``id`` parameter is a >> unique ID >