On 4/14/2025 7:32 AM, Alejandro Jimenez wrote:
A guest must issue an INVALIDATE_DEVTAB_ENTRY command after changing a
Device Table entry (DTE) e.g. after attaching a device and setting up
its DTE. When intercepting this event, determine if the DTE has been
configured for paging or not, and toggle the appropriate memory regions
to allow DMA address translation for the address space if needed.
Signed-off-by: Alejandro Jimenez <alejandro.j.jime...@oracle.com>
---
hw/i386/amd_iommu.c | 68 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 67 insertions(+), 1 deletion(-)
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 3bfa08419ffe..abdd67f6b12c 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -101,6 +101,8 @@ static void
amdvi_sync_shadow_page_table_range(AMDVIAddressSpace *as,
uint64_t *dte, hwaddr addr,
uint64_t size, bool
send_unmap);
static void amdvi_address_space_unmap(AMDVIAddressSpace *as, IOMMUNotifier
*n);
+static void amdvi_address_space_sync(AMDVIAddressSpace *as);
+static void amdvi_switch_address_space(AMDVIAddressSpace *amdvi_as);
uint64_t amdvi_extended_feature_register(AMDVIState *s)
{
@@ -432,7 +434,15 @@ static void amdvi_completion_wait(AMDVIState *s, uint64_t
*cmd)
trace_amdvi_completion_wait(addr, data);
}
-/* log error without aborting since linux seems to be using reserved bits */
+/*
+ * A guest driver must issue the INVALIDATE_DEVTAB_ENTRY command to the IOMMU
+ * after changing a Device Table entry. We can use this fact to detect when a
+ * Device Table entry is created for a device attached to a paging domain and
+ * and enable the corresponding IOMMU memory region to allow for DMA
+ * translation if appropriate.
+ *
+ * log error without aborting since linux seems to be using reserved bits
+ */
static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
{
uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
@@ -442,6 +452,62 @@ static void amdvi_inval_devtab_entry(AMDVIState *s,
uint64_t *cmd)
amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
s->cmdbuf + s->cmdbuf_head);
}
+
+ /*
+ * Convert the devid encoded in the command to a bus and devfn in
+ * order to retrieve the corresponding address space.
+ */
+ uint8_t bus_num, devfn, dte_mode;
+ AMDVIAddressSpace *as;
+ uint64_t dte[4] = { 0 };
+ IOMMUNotifier *n;
+ int ret;
+
+ bus_num = PCI_BUS_NUM(devid);
+ devfn = devid & 0xff;
+
+ /*
+ * The main buffer of size (AMDVIAddressSpace *) * (PCI_BUS_MAX) has
already
+ * been allocated within AMDVIState, but must be careful to not access
+ * unallocated devfn.
+ */
+ if (!s->address_spaces[bus_num] || !s->address_spaces[bus_num][devfn]) {
+ return;
+ }
+ as = s->address_spaces[bus_num][devfn];
+
+ ret = amdvi_as_to_dte(as, dte);
+
+ if (!ret) {
+ dte_mode = (dte[0] >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
+ }
+
+ if ((ret < 0) || (!ret && !dte_mode)) {
+ /*
+ * The DTE could not be retrieved, it is not valid, or it is not setup
+ * for paging. In either case, ensure that if paging was previously in
+ * use then switch to use the no_dma memory region, and invalidate all
+ * existing mappings.
+ */
+ if (as->addr_translation) {
+ as->addr_translation = false;
+
+ amdvi_switch_address_space(as);
+
+ IOMMU_NOTIFIER_FOREACH(n, &as->iommu) {
+ amdvi_address_space_unmap(as, n);
+ }
Hi,
I think amdvi_switch_address_space() should come after
amdvi_address_space_unmap(). amdvi_switch_address_space() unregister the
VFIO notifier, hence mr->iommu_notify list is empty and we do not unmap
the shadow page table.
Code works fine because eventually vfio_iommu_map_notify maps
entire the address space, but we should keep the right ordering.
Regards
Sairaj Kodilkar
+ }
+ } else if (!as->addr_translation) {
+ /*
+ * Installing a DTE that enables translation where it wasn't previously
+ * active. Activate the DMA memory region.
+ */
+ as->addr_translation = true;
+ amdvi_switch_address_space(as);
+ amdvi_address_space_sync(as);
+ }
+
trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
PCI_FUNC(devid));
}