Allow access to /dev/iommu and /dev/vfio/devices/vfio* when launching a qemu
VM with iommufd feature enabled.

Signed-off-by: Nathan Chen <nath...@nvidia.com>
---
 src/qemu/qemu_cgroup.c           | 61 ++++++++++++++++++++++++++++
 src/qemu/qemu_cgroup.h           |  1 +
 src/qemu/qemu_namespace.c        | 44 +++++++++++++++++++++
 src/security/security_apparmor.c | 11 ++++++
 src/security/security_dac.c      | 23 +++++++++++
 src/security/security_selinux.c  | 24 +++++++++++
 src/util/virpci.c                | 68 ++++++++++++++++++++++++++++++++
 src/util/virpci.h                |  1 +
 8 files changed, 233 insertions(+)

diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index f10976c2b0..73d0cb3a7a 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -462,6 +462,54 @@ qemuTeardownInputCgroup(virDomainObj *vm,
 }
 
 
+int
+qemuSetupIommufdCgroup(virDomainObj *vm)
+{
+    qemuDomainObjPrivate *priv = vm->privateData;
+    g_autoptr(DIR) dir = NULL;
+    struct dirent *dent;
+    g_autofree char *path = NULL;
+    int iommufd = 0;
+    size_t i;
+
+    for (i = 0; i < vm->def->nhostdevs; i++) {
+        if (vm->def->hostdevs[i]->iommufdId) {
+            iommufd = 1;
+            break;
+        }
+    }
+
+    if (iommufd == 1) {
+        if (!virCgroupHasController(priv->cgroup, 
VIR_CGROUP_CONTROLLER_DEVICES))
+            return 0;
+        if (virDirOpen(&dir, "/dev/vfio/devices") < 0) {
+            if (errno == ENOENT)
+                return 0;
+            return -1;
+        }
+        while (virDirRead(dir, &dent, "/dev/vfio/devices") > 0) {
+            if (STRPREFIX(dent->d_name, "vfio")) {
+                path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name);
+            }
+            if (path &&
+                qemuCgroupAllowDevicePath(vm, path,
+                                          VIR_CGROUP_DEVICE_RW, false) < 0) {
+                return -1;
+            }
+            path = NULL;
+        }
+        if (virFileExists("/dev/iommu"))
+            path = g_strdup("/dev/iommu");
+        if (path &&
+            qemuCgroupAllowDevicePath(vm, path,
+                                      VIR_CGROUP_DEVICE_RW, false) < 0) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+
 /**
  * qemuSetupHostdevCgroup:
  * vm: domain object
@@ -760,6 +808,7 @@ qemuSetupDevicesCgroup(virDomainObj *vm)
     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
     const char *const *deviceACL = (const char *const *) cfg->cgroupDeviceACL;
     int rv = -1;
+    int iommufd = 0;
     size_t i;
 
     if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
@@ -830,6 +879,18 @@ qemuSetupDevicesCgroup(virDomainObj *vm)
             return -1;
     }
 
+    for (i = 0; i < vm->def->nhostdevs; i++) {
+        if (vm->def->hostdevs[i]->iommufdId) {
+            iommufd = 1;
+            break;
+        }
+    }
+
+    if (iommufd == 1) {
+        if (qemuSetupIommufdCgroup(vm) < 0)
+            return -1;
+    }
+
     for (i = 0; i < vm->def->nmems; i++) {
         if (qemuSetupMemoryDevicesCgroup(vm, vm->def->mems[i]) < 0)
             return -1;
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index 3668034cde..bea677ba3c 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -42,6 +42,7 @@ int qemuSetupHostdevCgroup(virDomainObj *vm,
 int qemuTeardownHostdevCgroup(virDomainObj *vm,
                               virDomainHostdevDef *dev)
    G_GNUC_WARN_UNUSED_RESULT;
+int qemuSetupIommufdCgroup(virDomainObj *vm);
 int qemuSetupMemoryDevicesCgroup(virDomainObj *vm,
                                  virDomainMemoryDef *mem);
 int qemuTeardownMemoryDevicesCgroup(virDomainObj *vm,
diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c
index f72da83929..965a304f7f 100644
--- a/src/qemu/qemu_namespace.c
+++ b/src/qemu/qemu_namespace.c
@@ -677,6 +677,47 @@ qemuDomainSetupLaunchSecurity(virDomainObj *vm,
 }
 
 
+static int
+qemuDomainSetupIommufd(virDomainObj *vm,
+                       GSList **paths)
+{
+    g_autoptr(DIR) dir = NULL;
+    struct dirent *dent;
+    g_autofree char *path = NULL;
+    int iommufd = 0;
+    size_t i;
+
+    for (i = 0; i < vm->def->nhostdevs; i++) {
+        if (vm->def->hostdevs[i]->iommufdId) {
+            iommufd = 1;
+            break;
+        }
+    }
+
+    /* Check if iommufd is enabled */
+    if (iommufd == 1) {
+        if (virDirOpen(&dir, "/dev/vfio/devices") < 0) {
+            if (errno == ENOENT)
+                return 0;
+            return -1;
+        }
+        while (virDirRead(dir, &dent, "/dev/vfio/devices") > 0) {
+            if (STRPREFIX(dent->d_name, "vfio")) {
+                path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name);
+                *paths = g_slist_prepend(*paths, g_steal_pointer(&path));
+            }
+        }
+        path = NULL;
+        if (virFileExists("/dev/iommu"))
+            path = g_strdup("/dev/iommu");
+        if (path)
+            *paths = g_slist_prepend(*paths, g_steal_pointer(&path));
+    }
+
+    return 0;
+}
+
+
 static int
 qemuNamespaceMknodPaths(virDomainObj *vm,
                         GSList *paths,
@@ -700,6 +741,9 @@ qemuDomainBuildNamespace(virQEMUDriverConfig *cfg,
     if (qemuDomainSetupAllDisks(vm, &paths) < 0)
         return -1;
 
+    if (qemuDomainSetupIommufd(vm, &paths) < 0)
+        return -1;
+
     if (qemuDomainSetupAllHostdevs(vm, &paths) < 0)
         return -1;
 
diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c
index 68ac39611f..73dc750c94 100644
--- a/src/security/security_apparmor.c
+++ b/src/security/security_apparmor.c
@@ -856,6 +856,17 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr,
             }
             ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr);
             VIR_FREE(vfioGroupDev);
+
+            if (dev->iommufdId) {
+                g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci);
+                if (vfiofdDev) {
+                    int ret2 = AppArmorSetSecurityPCILabel(pci, vfiofdDev, 
ptr);
+                    if (ret2 < 0)
+                        ret = ret2;
+                } else {
+                    return -1;
+                }
+            }
         } else {
             ret = virPCIDeviceFileIterate(pci, AppArmorSetSecurityPCILabel, 
ptr);
         }
diff --git a/src/security/security_dac.c b/src/security/security_dac.c
index 2f788b872a..327e36466d 100644
--- a/src/security/security_dac.c
+++ b/src/security/security_dac.c
@@ -1290,6 +1290,18 @@ virSecurityDACSetHostdevLabel(virSecurityManager *mgr,
             ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev,
                                                       false,
                                                       &cbdata);
+            if (dev->iommufdId) {
+                g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci);
+                if (vfiofdDev) {
+                    int ret2 = virSecurityDACSetHostdevLabelHelper(vfiofdDev,
+                                                                   false,
+                                                                   &cbdata);
+                    if (ret2 < 0)
+                        ret = ret2;
+                } else {
+                    return -1;
+                }
+            }
         } else {
             ret = virPCIDeviceFileIterate(pci,
                                           virSecurityDACSetPCILabel,
@@ -1450,6 +1462,17 @@ virSecurityDACRestoreHostdevLabel(virSecurityManager 
*mgr,
 
             ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL,
                                                          vfioGroupDev, false);
+            if (dev->iommufdId) {
+                g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci);
+                if (vfiofdDev) {
+                    int ret2 = virSecurityDACRestoreFileLabelInternal(mgr, 
NULL,
+                                                                      
vfiofdDev, false);
+                    if (ret2 < 0)
+                        ret = ret2;
+                } else {
+                    return -1;
+                }
+            }
         } else {
             ret = virPCIDeviceFileIterate(pci, virSecurityDACRestorePCILabel, 
mgr);
         }
diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c
index fa5d1568eb..60dcadd839 100644
--- a/src/security/security_selinux.c
+++ b/src/security/security_selinux.c
@@ -2248,6 +2248,19 @@ 
virSecuritySELinuxSetHostdevSubsysLabel(virSecurityManager *mgr,
             ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev,
                                                           false,
                                                           &data);
+            if (dev->iommufdId) {
+                g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci);
+                if (vfiofdDev) {
+                    int ret2 = 
virSecuritySELinuxSetHostdevLabelHelper(vfiofdDev,
+                                                                       false,
+                                                                       &data);
+                    if (ret2 < 0)
+                        ret = ret2;
+                } else {
+                    return -1;
+                }
+            }
+
         } else {
             ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxSetPCILabel, 
&data);
         }
@@ -2481,6 +2494,17 @@ 
virSecuritySELinuxRestoreHostdevSubsysLabel(virSecurityManager *mgr,
                 return -1;
 
             ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false);
+
+            if (dev->iommufdId) {
+                g_autofree char *vfiofdDev = virPCIDeviceGetIOMMUFDDev(pci);
+                if (vfiofdDev) {
+                    int ret2 = virSecuritySELinuxRestoreFileLabel(mgr, 
vfiofdDev, false);
+                    if (ret2 < 0)
+                        ret = ret2;
+                } else {
+                    return -1;
+                }
+            }
         } else {
             ret = virPCIDeviceFileIterate(pci, 
virSecuritySELinuxRestorePCILabel, mgr);
         }
diff --git a/src/util/virpci.c b/src/util/virpci.c
index 90617e69c6..6e6e5e47c0 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -2478,6 +2478,74 @@ virPCIDeviceGetIOMMUGroupDev(virPCIDevice *dev)
     return g_strdup_printf("/dev/vfio/%s", groupFile);
 }
 
+/* virPCIDeviceGetIOMMUFDDev - return the name of the device used
+ * to control this PCI device's group (e.g. "/dev/vfio/devices/vfio15")
+ */
+char *
+virPCIDeviceGetIOMMUFDDev(virPCIDevice *dev)
+{
+    g_autofree char *path = NULL;
+    const char *pci_addr = NULL;
+    g_autoptr(DIR) dir = NULL;
+    struct dirent *entry;
+    char *vfiodev = NULL;
+
+    /* Get PCI device address */
+    pci_addr = virPCIDeviceGetName(dev);
+    if (!pci_addr)
+        return NULL;
+
+    /* First try: look in PCI device's vfio-dev subdirectory */
+    path = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev", pci_addr);
+
+    if (virDirOpen(&dir, path) == 1) {
+        while (virDirRead(dir, &entry, path) > 0) {
+            if (!g_str_has_prefix(entry->d_name, "vfio"))
+                continue;
+
+            vfiodev = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name);
+            break;
+        }
+        /* g_autoptr will automatically close dir when it goes out of scope */
+        dir = NULL;
+    }
+
+    /* Second try: scan /sys/class/vfio-dev for matching device */
+    if (!vfiodev) {
+        g_free(path);
+        path = g_strdup("/sys/class/vfio-dev");
+
+        if (virDirOpen(&dir, path) == 1) {
+            while (virDirRead(dir, &entry, path) > 0) {
+                g_autofree char *dev_link = NULL;
+                g_autofree char *target = NULL;
+
+                if (!g_str_has_prefix(entry->d_name, "vfio"))
+                    continue;
+
+                dev_link = g_strdup_printf("/sys/class/vfio-dev/%s/device", 
entry->d_name);
+
+                if (virFileResolveLink(dev_link, &target) < 0)
+                    continue;
+
+                if (strstr(target, pci_addr)) {
+                    vfiodev = g_strdup_printf("/dev/vfio/devices/%s", 
entry->d_name);
+                    break;
+                }
+            }
+            /* g_autoptr will automatically close dir */
+        }
+    }
+
+    /* Verify the device path exists and is accessible */
+    if (vfiodev && !virFileExists(vfiodev)) {
+        VIR_FREE(vfiodev);
+        return NULL;
+    }
+
+    return vfiodev;
+}
+
 static int
 virPCIDeviceDownstreamLacksACS(virPCIDevice *dev)
 {
diff --git a/src/util/virpci.h b/src/util/virpci.h
index fc538566e1..996ffab2f9 100644
--- a/src/util/virpci.h
+++ b/src/util/virpci.h
@@ -203,6 +203,7 @@ int virPCIDeviceAddressGetIOMMUGroupNum(virPCIDeviceAddress 
*addr);
 char *virPCIDeviceAddressGetIOMMUGroupDev(const virPCIDeviceAddress *devAddr);
 bool virPCIDeviceExists(const virPCIDeviceAddress *addr);
 char *virPCIDeviceGetIOMMUGroupDev(virPCIDevice *dev);
+char *virPCIDeviceGetIOMMUFDDev(virPCIDevice *dev);
 
 int virPCIDeviceIsAssignable(virPCIDevice *dev,
                              int strict_acs_check);
-- 
2.43.0

Reply via email to