On a system influenced by ERRATA_772415, IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 is repored by IOMMU_DEVICE_GET_HW_INFO. Due to this errata, even the readonly range mapped on stage-2 page table could still be written.
Reference from 4th Gen Intel Xeon Processor Scalable Family Specification Update, Errata Details, SPR17. [0] https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/eagle-stream/sapphire-rapids-specification-update We utilize the new added IOMMUFD container/ioas/hwpt management framework in VTD. Add a check to create new VTDIOASContainer to hold RW-only mappings, then this VTDIOASContainer can be used as backend for device with ERRATA_772415. See below diagram for details: IntelIOMMUState | V .------------------. .------------------. .-------------------. | VTDIOASContainer |--->| VTDIOASContainer |--->| VTDIOASContainer |-->... | (iommufd0,RW&RO) | | (iommufd1,RW&RO) | | (iommufd0,RW only)| .------------------. .------------------. .-------------------. | | | | .-->... | V V .-------------------. .-------------------. .---------------. | VTDS2Hwpt(CC) |--->| VTDS2Hwpt(non-CC) |-->... | VTDS2Hwpt(CC) |-->... .-------------------. .-------------------. .---------------. | | | | | | | | .-----------. .-----------. .------------. .------------. | IOMMUFD | | IOMMUFD | | IOMMUFD | | IOMMUFD | | Device(CC)| | Device(CC)| | Device | | Device(CC) | | (iommufd0)| | (iommufd0)| | (non-CC) | | (errata) | | | | | | (iommufd0) | | (iommufd0) | .-----------. .-----------. .------------. .------------. Changed to pass VTDHostIOMMUDevice pointer to vtd_check_hdev() so errata could be saved. Suggested-by: Yi Liu <yi.l....@intel.com> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> --- hw/i386/intel_iommu_internal.h | 1 + include/hw/i386/intel_iommu.h | 1 + hw/i386/intel_iommu.c | 26 +++++++++++++++++++------- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 23b7e236b0..8558781af8 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -654,5 +654,6 @@ typedef struct VTDHostIOMMUDevice { PCIBus *bus; uint8_t devfn; HostIOMMUDevice *hiod; + uint32_t errata; } VTDHostIOMMUDevice; #endif diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 594281c1d3..9b156dc32e 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -103,6 +103,7 @@ typedef struct VTDPASIDCacheEntry { typedef struct VTDIOASContainer { struct IOMMUFDBackend *iommufd; uint32_t ioas_id; + uint32_t errata; MemoryListener listener; QLIST_HEAD(, VTDS2Hwpt) s2_hwpt_list; QLIST_ENTRY(VTDIOASContainer) next; diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index e36ac44110..dae1716629 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2443,7 +2443,8 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s) } #ifdef CONFIG_IOMMUFD -static bool iommufd_listener_skipped_section(MemoryRegionSection *section) +static bool iommufd_listener_skipped_section(VTDIOASContainer *container, + MemoryRegionSection *section) { return !memory_region_is_ram(section->mr) || memory_region_is_protected(section->mr) || @@ -2453,7 +2454,8 @@ static bool iommufd_listener_skipped_section(MemoryRegionSection *section) * are never accessed by the CPU and beyond the address width of * some IOMMU hardware. TODO: VFIO should tell us the IOMMU width. */ - section->offset_within_address_space & (1ULL << 63); + section->offset_within_address_space & (1ULL << 63) || + (container->errata && section->readonly); } static void iommufd_listener_region_add_s2domain(MemoryListener *listener, @@ -2469,7 +2471,7 @@ static void iommufd_listener_region_add_s2domain(MemoryListener *listener, Error *err = NULL; int ret; - if (iommufd_listener_skipped_section(section)) { + if (iommufd_listener_skipped_section(container, section)) { return; } iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); @@ -2520,7 +2522,7 @@ static void iommufd_listener_region_del_s2domain(MemoryListener *listener, Int128 llend, llsize; int ret; - if (iommufd_listener_skipped_section(section)) { + if (iommufd_listener_skipped_section(container, section)) { return; } iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); @@ -2776,7 +2778,8 @@ static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod, /* try to attach to an existing container in this space */ QLIST_FOREACH(container, &s->containers, next) { - if (container->iommufd != iommufd) { + if (container->iommufd != iommufd || + container->errata != vtd_hiod->errata) { continue; } @@ -2803,6 +2806,7 @@ static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod, container = g_malloc0(sizeof(*container)); container->iommufd = iommufd; container->ioas_id = ioas_id; + container->errata = vtd_hiod->errata; QLIST_INIT(&container->s2_hwpt_list); if (vtd_device_attach_container(vtd_hiod, container, pasid, pe, hwpt, @@ -5329,9 +5333,10 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, return vtd_dev_as; } -static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod, +static bool vtd_check_hiod(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod, Error **errp) { + HostIOMMUDevice *hiod = vtd_hiod->hiod; HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); int ret; @@ -5388,6 +5393,12 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod, return false; } + ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_ERRATA, errp); + if (ret < 0) { + return false; + } + vtd_hiod->errata = ret; + error_setg(errp, "host device is uncompatible with stage-1 translation"); return false; } @@ -5419,7 +5430,8 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, vtd_hiod->iommu_state = s; vtd_hiod->hiod = hiod; - if (!vtd_check_hiod(s, hiod, errp)) { + if (!vtd_check_hiod(s, vtd_hiod, errp)) { + g_free(vtd_hiod); vtd_iommu_unlock(s); return false; } -- 2.34.1