Allow access to /dev/iommu and /dev/vfio/devices/vfio* when launching a qemu VM with iommufd feature enabled.
Signed-off-by: Nathan Chen <nath...@nvidia.com> --- src/qemu/qemu_cgroup.c | 61 ++++++++++++++++++++++++++++ src/qemu/qemu_cgroup.h | 1 + src/qemu/qemu_namespace.c | 44 +++++++++++++++++++++ src/security/security_apparmor.c | 11 ++++++ src/security/security_dac.c | 23 +++++++++++ src/security/security_selinux.c | 24 +++++++++++ src/util/virpci.c | 68 ++++++++++++++++++++++++++++++++ src/util/virpci.h | 1 + 8 files changed, 233 insertions(+) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index f10976c2b0..73d0cb3a7a 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -462,6 +462,54 @@ qemuTeardownInputCgroup(virDomainObj *vm, } +int +qemuSetupIommufdCgroup(virDomainObj *vm) +{ + qemuDomainObjPrivate *priv = vm->privateData; + g_autoptr(DIR) dir = NULL; + struct dirent *dent; + g_autofree char *path = NULL; + int iommufd = 0; + size_t i; + + for (i = 0; i < vm->def->nhostdevs; i++) { + if (vm->def->hostdevs[i]->iommufdId) { + iommufd = 1; + break; + } + } + + if (iommufd == 1) { + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + if (virDirOpen(&dir, "/dev/vfio/devices") < 0) { + if (errno == ENOENT) + return 0; + return -1; + } + while (virDirRead(dir, &dent, "/dev/vfio/devices") > 0) { + if (STRPREFIX(dent->d_name, "vfio")) { + path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name); + } + if (path && + qemuCgroupAllowDevicePath(vm, path, + VIR_CGROUP_DEVICE_RW, false) < 0) { + return -1; + } + path = NULL; + } + if (virFileExists("/dev/iommu")) + path = g_strdup("/dev/iommu"); + if (path && + qemuCgroupAllowDevicePath(vm, path, + VIR_CGROUP_DEVICE_RW, false) < 0) { + return -1; + } + } + return 0; +} + + /** * qemuSetupHostdevCgroup: * vm: domain object @@ -760,6 +808,7 @@ qemuSetupDevicesCgroup(virDomainObj *vm) g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); const char *const *deviceACL = (const char *const *) cfg->cgroupDeviceACL; int rv = -1; + int iommufd = 0; size_t i; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) @@ -830,6 +879,18 @@ qemuSetupDevicesCgroup(virDomainObj *vm) return -1; } + for (i = 0; i < vm->def->nhostdevs; i++) { + if (vm->def->hostdevs[i]->iommufdId) { + iommufd = 1; + break; + } + } + + if (iommufd == 1) { + if (qemuSetupIommufdCgroup(vm) < 0) + return -1; + } + for (i = 0; i < vm->def->nmems; i++) { if (qemuSetupMemoryDevicesCgroup(vm, vm->def->mems[i]) < 0) return -1; diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 3668034cde..bea677ba3c 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -42,6 +42,7 @@ int qemuSetupHostdevCgroup(virDomainObj *vm, int qemuTeardownHostdevCgroup(virDomainObj *vm, virDomainHostdevDef *dev) G_GNUC_WARN_UNUSED_RESULT; +int qemuSetupIommufdCgroup(virDomainObj *vm); int qemuSetupMemoryDevicesCgroup(virDomainObj *vm, virDomainMemoryDef *mem); int qemuTeardownMemoryDevicesCgroup(virDomainObj *vm, diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c index f72da83929..965a304f7f 100644 --- a/src/qemu/qemu_namespace.c +++ b/src/qemu/qemu_namespace.c @@ -677,6 +677,47 @@ qemuDomainSetupLaunchSecurity(virDomainObj *vm, } +static int +qemuDomainSetupIommufd(virDomainObj *vm, + GSList **paths) +{ + g_autoptr(DIR) dir = NULL; + struct dirent *dent; + g_autofree char *path = NULL; + int iommufd = 0; + size_t i; + + for (i = 0; i < vm->def->nhostdevs; i++) { + if (vm->def->hostdevs[i]->iommufdId) { + iommufd = 1; + break; + } + } + + /* Check if iommufd is enabled */ + if (iommufd == 1) { + if (virDirOpen(&dir, "/dev/vfio/devices") < 0) { + if (errno == ENOENT) + return 0; + return -1; + } + while (virDirRead(dir, &dent, "/dev/vfio/devices") > 0) { + if (STRPREFIX(dent->d_name, "vfio")) { + path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name); + *paths = g_slist_prepend(*paths, g_steal_pointer(&path)); + } + } + path = NULL; + if (virFileExists("/dev/iommu")) + path = g_strdup("/dev/iommu"); + if (path) + *paths = g_slist_prepend(*paths, g_steal_pointer(&path)); + } + + return 0; +} + + static int qemuNamespaceMknodPaths(virDomainObj *vm, GSList *paths, @@ -700,6 +741,9 @@ qemuDomainBuildNamespace(virQEMUDriverConfig *cfg, if (qemuDomainSetupAllDisks(vm, &paths) < 0) return -1; + if (qemuDomainSetupIommufd(vm, &paths) < 0) + return -1; + if (qemuDomainSetupAllHostdevs(vm, &paths) < 0) return -1; diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c index 68ac39611f..73dc750c94 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -856,6 +856,17 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr, } ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); VIR_FREE(vfioGroupDev); + + if (dev->iommufdId) { + g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci); + if (vfiofdDev) { + int ret2 = AppArmorSetSecurityPCILabel(pci, vfiofdDev, ptr); + if (ret2 < 0) + ret = ret2; + } else { + return -1; + } + } } else { ret = virPCIDeviceFileIterate(pci, AppArmorSetSecurityPCILabel, ptr); } diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 2f788b872a..327e36466d 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -1290,6 +1290,18 @@ virSecurityDACSetHostdevLabel(virSecurityManager *mgr, ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev, false, &cbdata); + if (dev->iommufdId) { + g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci); + if (vfiofdDev) { + int ret2 = virSecurityDACSetHostdevLabelHelper(vfiofdDev, + false, + &cbdata); + if (ret2 < 0) + ret = ret2; + } else { + return -1; + } + } } else { ret = virPCIDeviceFileIterate(pci, virSecurityDACSetPCILabel, @@ -1450,6 +1462,17 @@ virSecurityDACRestoreHostdevLabel(virSecurityManager *mgr, ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, vfioGroupDev, false); + if (dev->iommufdId) { + g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci); + if (vfiofdDev) { + int ret2 = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + vfiofdDev, false); + if (ret2 < 0) + ret = ret2; + } else { + return -1; + } + } } else { ret = virPCIDeviceFileIterate(pci, virSecurityDACRestorePCILabel, mgr); } diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index fa5d1568eb..60dcadd839 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -2248,6 +2248,19 @@ virSecuritySELinuxSetHostdevSubsysLabel(virSecurityManager *mgr, ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev, false, &data); + if (dev->iommufdId) { + g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci); + if (vfiofdDev) { + int ret2 = virSecuritySELinuxSetHostdevLabelHelper(vfiofdDev, + false, + &data); + if (ret2 < 0) + ret = ret2; + } else { + return -1; + } + } + } else { ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxSetPCILabel, &data); } @@ -2481,6 +2494,17 @@ virSecuritySELinuxRestoreHostdevSubsysLabel(virSecurityManager *mgr, return -1; ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false); + + if (dev->iommufdId) { + g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci); + if (vfiofdDev) { + int ret2 = virSecuritySELinuxRestoreFileLabel(mgr, vfiofdDev, false); + if (ret2 < 0) + ret = ret2; + } else { + return -1; + } + } } else { ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxRestorePCILabel, mgr); } diff --git a/src/util/virpci.c b/src/util/virpci.c index 90617e69c6..6e6e5e47c0 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -2478,6 +2478,74 @@ virPCIDeviceGetIOMMUGroupDev(virPCIDevice *dev) return g_strdup_printf("/dev/vfio/%s", groupFile); } +/* virPCIDeviceGetIOMMUFDDev - return the name of the device used + * to control this PCI device's group (e.g. "/dev/vfio/devices/vfio15") + */ +char * +virPCIDeviceGetIOMMUFDDev(virPCIDevice *dev) +{ + g_autofree char *path = NULL; + const char *pci_addr = NULL; + g_autoptr(DIR) dir = NULL; + struct dirent *entry; + char *vfiodev = NULL; + + /* Get PCI device address */ + pci_addr = virPCIDeviceGetName(dev); + if (!pci_addr) + return NULL; + + /* First try: look in PCI device's vfio-dev subdirectory */ + path = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev", pci_addr); + + if (virDirOpen(&dir, path) == 1) { + while (virDirRead(dir, &entry, path) > 0) { + if (!g_str_has_prefix(entry->d_name, "vfio")) + continue; + + vfiodev = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name); + break; + } + /* g_autoptr will automatically close dir when it goes out of scope */ + dir = NULL; + } + + /* Second try: scan /sys/class/vfio-dev for matching device */ + if (!vfiodev) { + g_free(path); + path = g_strdup("/sys/class/vfio-dev"); + + if (virDirOpen(&dir, path) == 1) { + while (virDirRead(dir, &entry, path) > 0) { + g_autofree char *dev_link = NULL; + g_autofree char *target = NULL; + + if (!g_str_has_prefix(entry->d_name, "vfio")) + continue; + + dev_link = g_strdup_printf("/sys/class/vfio-dev/%s/device", entry->d_name); + + if (virFileResolveLink(dev_link, &target) < 0) + continue; + + if (strstr(target, pci_addr)) { + vfiodev = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name); + break; + } + } + /* g_autoptr will automatically close dir */ + } + } + + /* Verify the device path exists and is accessible */ + if (vfiodev && !virFileExists(vfiodev)) { + VIR_FREE(vfiodev); + return NULL; + } + + return vfiodev; +} + static int virPCIDeviceDownstreamLacksACS(virPCIDevice *dev) { diff --git a/src/util/virpci.h b/src/util/virpci.h index fc538566e1..996ffab2f9 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -203,6 +203,7 @@ int virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddress *addr); char *virPCIDeviceAddressGetIOMMUGroupDev(const virPCIDeviceAddress *devAddr); bool virPCIDeviceExists(const virPCIDeviceAddress *addr); char *virPCIDeviceGetIOMMUGroupDev(virPCIDevice *dev); +char *virPCIDeviceGetIOMMUFDDev(virPCIDevice *dev); int virPCIDeviceIsAssignable(virPCIDevice *dev, int strict_acs_check); -- 2.43.0