{
g_autoptr(virJSONValue) props = NULL;
virDomainHostdevSubsysPCI *pcisrc = &dev->source.subsys.u.pci;
@@ -4741,6 +4742,13 @@ qemuBuildPCIHostdevDevProps(const virDomainDef
*def,
const char *iommufdId = NULL;
/* 'ramfb' property must be omitted unless it's to be enabled */
bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON;
+ bool useIommufd = false;
+ qemuDomainObjPrivate *priv = vm ? vm->privateData : NULL;
+
+ if (pcisrc->driver.name ==
VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
+ pcisrc->driver.iommufd) {
+ useIommufd = true;
+ }
/* caller has to assign proper passthrough driver name */
switch (pcisrc->driver.name) {
@@ -4787,6 +4795,18 @@ qemuBuildPCIHostdevDevProps(const virDomainDef
*def,
NULL) < 0)
return NULL;
+ if (useIommufd && priv) {
+ g_autofree char *vfioFdName = g_strdup_printf("vfio-%04x:
%02x:%02x.%d",
+ pcisrc-
>addr.domain, pcisrc->addr.bus,
+ pcisrc-
>addr.slot, pcisrc->addr.function);
+
There's no need to duplicate the list of hostdevs which use iommufd in a
per-domain hash table.
For storing per-device file descriptors, we have per-device private
data.
+ int vfiofd = GPOINTER_TO_INT(g_hash_table_lookup(priv-
>vfioDeviceFds, vfioFdName));
+ if (virJSONValueObjectAdd(&props,
+ "S:fd", g_strdup_printf("%d", vfiofd),
+ NULL) < 0)
+ return NULL;
+ }
+
if (qemuBuildDeviceAddressProps(props, def, dev->info) < 0)
return NULL;
@@ -5197,12 +5217,14 @@ qemuBuildAcpiNodesetProps(virCommand *cmd,
static int
qemuBuildHostdevCommandLine(virCommand *cmd,
const virDomainDef *def,
- virQEMUCaps *qemuCaps)
+ virQEMUCaps *qemuCaps,
+ virDomainObj *vm)
{
size_t i;
g_autoptr(virJSONValue) props = NULL;
int iommufd = 0;
const char * iommufdId = "iommufd0";
+ qemuDomainObjPrivate *priv = vm->privateData;
for (i = 0; i < def->nhostdevs; i++) {
virDomainHostdevDef *hostdev = def->hostdevs[i];
@@ -5233,8 +5255,10 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
if (subsys->u.pci.driver.iommufd && iommufd == 0) {
iommufd = 1;
+ virCommandPassFD(cmd, priv->iommufd,
VIR_COMMAND_PASS_FD_CLOSE_PARENT);
if (qemuMonitorCreateObjectProps(&props, "iommufd",
iommufdId,
+ "S:fd",
g_strdup_printf("%d", priv->iommufd),
NULL) < 0)
return -1;
@@ -5245,7 +5269,18 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
if (qemuCommandAddExtDevice(cmd, hostdev->info, def,
qemuCaps) < 0)
return -1;
- if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev)))
+ if (subsys->u.pci.driver.iommufd) {
+ virDomainHostdevSubsysPCI *pcisrc = &hostdev-
>source.subsys.u.pci;
+ g_autofree char *vfioFdName = g_strdup_printf("vfio-
%04x:%02x:%02x.%d",
+ pcisrc-
>addr.domain, pcisrc->addr.bus,
+ pcisrc-
>addr.slot, pcisrc->addr.function);
+
+ int vfiofd =
GPOINTER_TO_INT(g_hash_table_lookup(priv->vfioDeviceFds, vfioFdName));
+
+ virCommandPassFD(cmd, vfiofd,
VIR_COMMAND_PASS_FD_CLOSE_PARENT);
This would become just:
qemuDomainHostdevPrivate *priv = (qemuDomainHostdevPrivate *)vsock-
>privateData;
virCommandPassFD(cmd, priv->vfiofd, VIR_COMMAND_PASS_FD_CLOSE_PARENT);
+ }
+
+ if (!(devprops = qemuBuildPCIHostdevDevProps(def,
hostdev, vm)))
return -1;
if (qemuBuildDeviceCommandlineFromJSON(cmd, devprops, def,
qemuCaps) < 0)
@@ -10893,7 +10928,7 @@ qemuBuildCommandLine(virDomainObj *vm,
if (qemuBuildRedirdevCommandLine(cmd, def, qemuCaps) < 0)
return NULL;
- if (qemuBuildHostdevCommandLine(cmd, def, qemuCaps) < 0)
+ if (qemuBuildHostdevCommandLine(cmd, def, qemuCaps, vm) < 0)
return NULL;
if (migrateURI)
diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h
index ad068f1f16..380aac261f 100644
--- a/src/qemu/qemu_command.h
+++ b/src/qemu/qemu_command.h
@@ -180,7 +180,8 @@ qemuBuildThreadContextProps(virJSONValue **tcProps,
/* Current, best practice */
virJSONValue *
qemuBuildPCIHostdevDevProps(const virDomainDef *def,
- virDomainHostdevDef *dev);
+ virDomainHostdevDef *dev,
+ virDomainObj *vm);
virJSONValue *
qemuBuildRNGDevProps(const virDomainDef *def,
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index a42721efad..86640aa3e3 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -1953,6 +1953,11 @@ qemuDomainObjPrivateFree(void *data)
virChrdevFree(priv->devs);
+ if (priv->iommufd >= 0) {
+ virEventRemoveHandle(priv->iommufd);
There is no handle to remove (and none is needed). So no need for the
condition either.
+ priv->iommufd = -1;
+ }
+
if (priv->pidMonitored >= 0) {
virEventRemoveHandle(priv->pidMonitored);
priv->pidMonitored = -1;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 45fc32a663..cecfed94a7 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -25,6 +25,7 @@
#include <unistd.h>
#include <signal.h>
#include <sys/stat.h>
+#include <dirent.h>
We should not need this in qemu_process.
#if WITH_SYS_SYSCALL_H
# include <sys/syscall.h>
#endif
@@ -8091,6 +8092,9 @@ qemuProcessLaunch(virConnectPtr conn,
if (qemuExtDevicesStart(driver, vm, incomingMigrationExtDevices) < 0)
goto cleanup;
+ if (qemuProcessOpenVfioFds(vm) < 0)
+ goto cleanup;
+
if (!(cmd = qemuBuildCommandLine(vm,
incoming ? "defer" : NULL,
vmop,
@@ -10267,3 +10271,231 @@
qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit,
qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0,
nbdkit);
virObjectUnlock(vm);
}
+
+/**
+ * qemuProcessOpenIommuFd:
+ * @vm: domain object
+ * @iommuFd: returned file descriptor
+ *
+ * Opens /dev/iommu file descriptor for the VM.
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static int
+qemuProcessOpenIommuFd(virDomainObj *vm, int *iommuFd)
+{
+ int fd = -1;
+
+ VIR_DEBUG("Opening IOMMU FD for domain %s", vm->def->name);
+
+ if ((fd = open("/dev/iommu", O_RDWR | O_CLOEXEC)) < 0) {
+ if (errno == ENOENT) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("IOMMU FD support requires /dev/iommu
device"));
+ } else {
+ virReportSystemError(errno, "%s",
+ _("cannot open /dev/iommu"));
+ }
+ return -1;
+ }
+
+ *iommuFd = fd;
+ VIR_DEBUG("Opened IOMMU FD %d for domain %s", fd, vm->def->name);
+ return 0;
+}
+
+/**
+ * qemuProcessGetVfioDevicePath:
+ * @hostdev: host device definition
+ * @vfioPath: returned VFIO device path
+ *
+ * Constructs the VFIO device path for a PCI hostdev.
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static int
+qemuProcessGetVfioDevicePath(virDomainHostdevDef *hostdev,
No need to pass the whole hostdev here. Then this function can live in
virpci.c
+ char **vfioPath)
+{
+ virPCIDeviceAddress *addr;
+ g_autofree char *sysfsPath = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ int ret = -1;
+