On Mon, Jul 14, 2025 at 04:59:34PM +0100, Shameer Kolothum wrote: > From: Nicolin Chen <nicol...@nvidia.com> > > Implement a set_iommu_device callback: > -If found an existing viommu reuse that. > (Devices behind the same physical SMMU should share an S2 HWPT) > -Else, > Allocate a viommu with the nested parent S2 hwpt allocated by VFIO.
s/nested/nesting > Allocate bypass and abort hwpt. Let's spare some words explaining why they are needed: iommufd provides a vIOMMU model for nested translation support, where its object encapsulates an S2 nesting parent HWPT. In this mode, devices can't attach to the S2 HWPT directly, bypassing the iommufd vIOMMU object. Given that a vIOMMU object isn't directly attachable, allocate two proxy nested HWPTs (bypass and abort) for devices to attach. > @@ -7,6 +7,7 @@ > */ > > #include "qemu/osdep.h" > +#include "trace.h" > #include "qemu/error-report.h" Will look nicer in alphabetical order > static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus > *sbus, > PCIBus *bus, int devfn) There seems to be an extra space in the 2nd line's indentations. > +static bool > +smmuv3_accel_dev_alloc_viommu(SMMUv3AccelDevice *accel_dev, > + HostIOMMUDeviceIOMMUFD *idev, Error **errp) Ditto > +{ > + struct iommu_hwpt_arm_smmuv3 bypass_data = { > + .ste = { SMMU_STE_CFG_BYPASS | SMMU_STE_VALID, 0x0ULL }, > + }; > + struct iommu_hwpt_arm_smmuv3 abort_data = { > + .ste = { SMMU_STE_VALID, 0x0ULL }, > + }; > + SMMUDevice *sdev = &accel_dev->sdev; > + SMMUState *bs = sdev->smmu; > + SMMUv3State *s = ARM_SMMUV3(bs); > + SMMUv3AccelState *s_accel = s->s_accel; > + uint32_t s2_hwpt_id = idev->hwpt_id; > + SMMUS2Hwpt *s2_hwpt; > + SMMUViommu *viommu; > + uint32_t viommu_id; > + > + if (s_accel->viommu) { > + accel_dev->viommu = s_accel->viommu; > + return true; > + } > + > + if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid, > + IOMMU_VIOMMU_TYPE_ARM_SMMUV3, > + s2_hwpt_id, &viommu_id, errp)) { > + return false; > + } > + > + viommu = g_new0(SMMUViommu, 1); > + viommu->core.viommu_id = viommu_id; > + viommu->core.s2_hwpt_id = s2_hwpt_id; > + viommu->core.iommufd = idev->iommufd; > + > + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, > + viommu->core.viommu_id, 0, > + IOMMU_HWPT_DATA_ARM_SMMUV3, > + sizeof(abort_data), &abort_data, > + &viommu->abort_hwpt_id, errp)) { > + goto free_viommu; > + } > + > + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, > + viommu->core.viommu_id, 0, > + IOMMU_HWPT_DATA_ARM_SMMUV3, > + sizeof(bypass_data), &bypass_data, > + &viommu->bypass_hwpt_id, errp)) { > + goto free_abort_hwpt; > + } > + > + s2_hwpt = g_new(SMMUS2Hwpt, 1); > + s2_hwpt->iommufd = idev->iommufd; > + s2_hwpt->hwpt_id = s2_hwpt_id; s2_hwpt is core allocated now, so maybe we don't need this object. > + > + viommu->iommufd = idev->iommufd; > + viommu->s2_hwpt = s2_hwpt; > + > + s_accel->viommu = viommu; > + accel_dev->viommu = viommu; > + return true; > + > +free_abort_hwpt: > + iommufd_backend_free_id(idev->iommufd, viommu->abort_hwpt_id); > +free_viommu: > + iommufd_backend_free_id(idev->iommufd, viommu->core.viommu_id); > + g_free(viommu); > + return false; > +} > + > +static bool smmuv3_accel_set_iommu_device(PCIBus *bus, void *opaque, int > devfn, > + HostIOMMUDevice *hiod, Error > **errp) > +{ > + HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod); > + SMMUState *bs = opaque; > + SMMUv3State *s = ARM_SMMUV3(bs); > + SMMUv3AccelState *s_accel = s->s_accel; > + SMMUPciBus *sbus = smmu_get_sbus(bs, bus); > + SMMUv3AccelDevice *accel_dev = smmuv3_accel_get_dev(bs, sbus, bus, > devfn); > + SMMUDevice *sdev = &accel_dev->sdev; > + > + if (!idev) { > + return true; > + } > + > + if (accel_dev->idev) { > + if (accel_dev->idev != idev) { > + error_report("Device 0x%x already has an associated idev", > + smmu_get_sid(sdev)); > + return false; > + } else { > + return true; > + } > + } > + > + if (!smmuv3_accel_dev_alloc_viommu(accel_dev, idev, errp)) { > + error_report("Device 0x%x: Unable to alloc viommu", > smmu_get_sid(sdev)); > + return false; > + } > + > + accel_dev->idev = idev; > + QLIST_INSERT_HEAD(&s_accel->viommu->device_list, accel_dev, next); > + trace_smmuv3_accel_set_iommu_device(devfn, smmu_get_sid(sdev)); Since we need three direct copies of smmu_get_sid(), it'd be cleaner to have a local variable? + uint16_t sid = smmu_get_sid(sdev); Or should it have a validation of the sdev pointer like the unset() does? > + return true; > +} > + > +static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque, > + int devfn) > +{ > + SMMUState *bs = opaque; > + SMMUv3State *s = ARM_SMMUV3(bs); > + SMMUPciBus *sbus = g_hash_table_lookup(bs->smmu_pcibus_by_busptr, bus); > + SMMUv3AccelDevice *accel_dev; > + SMMUViommu *viommu; > + SMMUDevice *sdev; > + > + if (!sbus) { > + return; > + } > + > + sdev = sbus->pbdev[devfn]; > + if (!sdev) { > + return; > + } > + > + accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev); > + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev, > + accel_dev->idev->hwpt_id, > + NULL)) { > + error_report("Unable to attach dev to the default HW pagetable"); > + } > + > + accel_dev->idev = NULL; > + QLIST_REMOVE(accel_dev, next); > + trace_smmuv3_accel_unset_iommu_device(devfn, smmu_get_sid(sdev)); > + > + viommu = s->s_accel->viommu; > + if (QLIST_EMPTY(&viommu->device_list)) { > + iommufd_backend_free_id(viommu->iommufd, viommu->bypass_hwpt_id); > + iommufd_backend_free_id(viommu->iommufd, viommu->abort_hwpt_id); > + iommufd_backend_free_id(viommu->iommufd, viommu->core.viommu_id); > + iommufd_backend_free_id(viommu->iommufd, viommu->s2_hwpt->hwpt_id); > + g_free(viommu->s2_hwpt); s2_hwpt should be core-managed, so its id should not be free-ed here at least. Thanks Nicolin