On a Thursday in 2025, Nathan Chen wrote:


On 11/6/2025 11:19 AM, Ján Tomko wrote:
Open iommufd FDs from libvirt backend without
exposing these FDs to XML users, i.e. one per
domain for /dev/iommu and one per iommufd
hostdev for /dev/vfio/devices/vfioX, and pass
the FD to qemu command line.


The part formatting the object and the part formatting the device
should be split.


Sounds good, I will split it into two commits. >> Signed-off-by: Nathan Chen <[email protected]>
---
src/qemu/qemu_command.c |  43 +++++++-
src/qemu/qemu_command.h |   3 +-
src/qemu/qemu_domain.c  |   8 ++
src/qemu/qemu_domain.h  |   7 ++
src/qemu/qemu_hotplug.c |   2 +-
src/qemu/qemu_process.c | 232 ++++++++++++++++++++++++++++++++++++++++
6 files changed, 289 insertions(+), 6 deletions(-)

diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 8fd7527645..740a6970f2 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -4730,7 +4730,8 @@ qemuBuildVideoCommandLine(virCommand *cmd,

virJSONValue *
qemuBuildPCIHostdevDevProps(const virDomainDef *def,
-                            virDomainHostdevDef *dev)
+                            virDomainHostdevDef *dev,
+                            virDomainObj *vm)

Hmm, perhaps exposing the iommufd object in the XML would save us from
having to pass this.


We are passing virDomainObj to this function in order to retrieve the FD number, would you mind clarifying how we could avoid passing this by exposing the iommufd object in the XML? It is my understanding that exposing the iommufd object ID would still mean we need pass the virDomainObj.


If it was a separate device, it would have its own data type and own
formatting function unrelated to hostdevs.

{
    g_autoptr(virJSONValue) props = NULL;
    virDomainHostdevSubsysPCI *pcisrc = &dev->source.subsys.u.pci;
@@ -4741,6 +4742,13 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
    const char *iommufdId = NULL;
    /* 'ramfb' property must be omitted unless it's to be enabled */
    bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON;
+    bool useIommufd = false;
+    qemuDomainObjPrivate *priv = vm ? vm->privateData : NULL;
+
+    if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
+        pcisrc->driver.iommufd) {
+        useIommufd = true;
+    }

    /* caller has to assign proper passthrough driver name */
    switch (pcisrc->driver.name) {
@@ -4787,6 +4795,18 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
                              NULL) < 0)
        return NULL;

+    if (useIommufd && priv) {
+        g_autofree char *vfioFdName = g_strdup_printf("vfio-%04x: %02x:%02x.%d", +                                                      pcisrc-
addr.domain, pcisrc->addr.bus,
+                                                      pcisrc-
addr.slot, pcisrc->addr.function);
+

There's no need to duplicate the list of hostdevs which use iommufd in a
per-domain hash table.

For storing per-device file descriptors, we have per-device private
data.

+        int vfiofd = GPOINTER_TO_INT(g_hash_table_lookup(priv-
vfioDeviceFds, vfioFdName));
+        if (virJSONValueObjectAdd(&props,
+                                  "S:fd", g_strdup_printf("%d", vfiofd),
+                                  NULL) < 0)
+            return NULL;
+    }
+
    if (qemuBuildDeviceAddressProps(props, def, dev->info) < 0)
        return NULL;

@@ -5197,12 +5217,14 @@ qemuBuildAcpiNodesetProps(virCommand *cmd,
static int
qemuBuildHostdevCommandLine(virCommand *cmd,
                            const virDomainDef *def,
-                            virQEMUCaps *qemuCaps)
+                            virQEMUCaps *qemuCaps,
+                            virDomainObj *vm)
{
    size_t i;
    g_autoptr(virJSONValue) props = NULL;
    int iommufd = 0;
    const char * iommufdId = "iommufd0";
+    qemuDomainObjPrivate *priv = vm->privateData;

    for (i = 0; i < def->nhostdevs; i++) {
        virDomainHostdevDef *hostdev = def->hostdevs[i];
@@ -5233,8 +5255,10 @@ qemuBuildHostdevCommandLine(virCommand *cmd,

            if (subsys->u.pci.driver.iommufd && iommufd == 0) {
                iommufd = 1;
+                virCommandPassFD(cmd, priv->iommufd, VIR_COMMAND_PASS_FD_CLOSE_PARENT);
                if (qemuMonitorCreateObjectProps(&props, "iommufd",
                                                 iommufdId,
+                                                 "S:fd", g_strdup_printf("%d", priv->iommufd),
                                                 NULL) < 0)
                    return -1;

@@ -5245,7 +5269,18 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
            if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0)
                return -1;

-            if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev)))
+            if (subsys->u.pci.driver.iommufd) {
+                virDomainHostdevSubsysPCI *pcisrc = &hostdev-
source.subsys.u.pci;
+                g_autofree char *vfioFdName = g_strdup_printf("vfio- %04x:%02x:%02x.%d", +                                                              pcisrc- >addr.domain, pcisrc->addr.bus, +                                                              pcisrc- >addr.slot, pcisrc->addr.function);
+
+                int vfiofd = GPOINTER_TO_INT(g_hash_table_lookup(priv->vfioDeviceFds, vfioFdName));
+
+                virCommandPassFD(cmd, vfiofd, VIR_COMMAND_PASS_FD_CLOSE_PARENT);

This would become just:

qemuDomainHostdevPrivate *priv = (qemuDomainHostdevPrivate *)vsock-
privateData;

virCommandPassFD(cmd, priv->vfiofd, VIR_COMMAND_PASS_FD_CLOSE_PARENT);


I will proceed with implementing a qemuDomainHostdevPrivate struct and look into the existing implementation for qemuDomainDiskPrivate for reference. I was not able to find a private data attribute in the _virDomainHostdevDef struct definition, but I do see "virObject *privateData;" under the _virDomainDiskDef struct definition - are you aware of any reason behind this, or has it just never been needed for the hostdev struct?


It was added for disks at the time when it was needed.

Jano

+            }
+
+            if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev, vm)))
                return -1;

Attachment: signature.asc
Description: PGP signature

Reply via email to