>-----Original Message----- >From: Eric Auger <eric.au...@redhat.com> >Subject: Re: [PATCH rfcv2 13/20] intel_iommu: Add PASID cache management >infrastructure > > > > >On 2/19/25 9:22 AM, Zhenzhong Duan wrote: >> This adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the >> pasid entry and track PASID usage and future PASID tagged DMA address >> translation support in vIOMMU. >> >> VTDAddressSpace of PCI_NO_PASID is allocated when device is plugged and >> never freed. For other pasid, VTDAddressSpace instance is created/destroyed >> per the guest pasid entry set up/destroy for passthrough devices. While for >> emulated devices, VTDAddressSpace instance is created in the PASID tagged >DMA >> translation and be destroyed per guest PASID cache invalidation. This focuses >> on the PASID cache management for passthrough devices as there is no PASID >> capable emulated devices yet. >> >> When guest modifies a PASID entry, QEMU will capture the guest pasid >selective >> pasid cache invalidation, allocate or remove a VTDAddressSpace instance per >the >> invalidation reasons: >> >> *) a present pasid entry moved to non-present >> *) a present pasid entry to be a present entry >> *) a non-present pasid entry moved to present >> >> vIOMMU emulator could figure out the reason by fetching latest guest pasid >entry >> and compare it with the PASID cache. >> >> Signed-off-by: Yi Liu <yi.l....@intel.com> >> Signed-off-by: Yi Sun <yi.y....@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> >> --- >> hw/i386/intel_iommu_internal.h | 29 ++ >> include/hw/i386/intel_iommu.h | 6 + >> hw/i386/intel_iommu.c | 484 ++++++++++++++++++++++++++++++++- >Don't you have ways to split this patch. It has a huge change set and >this is really heavy to digest at once (at least for me).
Sure, will try. >> hw/i386/trace-events | 4 + >> 4 files changed, 513 insertions(+), 10 deletions(-) >> >> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h >> index 18bc22fc72..632fda2853 100644 >> --- a/hw/i386/intel_iommu_internal.h >> +++ b/hw/i386/intel_iommu_internal.h >> @@ -315,6 +315,7 @@ typedef enum VTDFaultReason { >> * request while disabled */ >> VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */ >> >> + VTD_FR_RTADDR_INV_TTM = 0x31, /* Invalid TTM in RTADDR */ >> /* PASID directory entry access failure */ >> VTD_FR_PASID_DIR_ACCESS_ERR = 0x50, >> /* The Present(P) field of pasid directory entry is 0 */ >> @@ -492,6 +493,15 @@ typedef union VTDInvDesc VTDInvDesc; >> #define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL >> #define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL >> >> +#define VTD_INV_DESC_PASIDC_G (3ULL << 4) >> +#define VTD_INV_DESC_PASIDC_PASID(val) (((val) >> 32) & 0xfffffULL) >> +#define VTD_INV_DESC_PASIDC_DID(val) (((val) >> 16) & >VTD_DOMAIN_ID_MASK) >> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0 0xfff000000000f1c0ULL >> + >> +#define VTD_INV_DESC_PASIDC_DSI (0ULL << 4) >> +#define VTD_INV_DESC_PASIDC_PASID_SI (1ULL << 4) >> +#define VTD_INV_DESC_PASIDC_GLOBAL (3ULL << 4) >> + >> /* Information about page-selective IOTLB invalidate */ >> struct VTDIOTLBPageInvInfo { >> uint16_t domain_id; >> @@ -548,10 +558,28 @@ typedef struct VTDRootEntry VTDRootEntry; >> #define VTD_CTX_ENTRY_LEGACY_SIZE 16 >> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32 >> >> +#define VTD_SM_CONTEXT_ENTRY_PDTS(val) (((val) >> 9) & 0x7) >> #define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xfffff >> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | >~VTD_HAW_MASK(aw)) >> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL >> >> +typedef enum VTDPCInvType { >> + /* force reset all */ >> + VTD_PASID_CACHE_FORCE_RESET = 0, >> + /* pasid cache invalidation rely on guest PASID entry */ >> + VTD_PASID_CACHE_GLOBAL_INV, >> + VTD_PASID_CACHE_DOMSI, >> + VTD_PASID_CACHE_PASIDSI, >> +} VTDPCInvType; >> + >> +typedef struct VTDPASIDCacheInfo { >> + VTDPCInvType type; >> + uint16_t domain_id; >> + uint32_t pasid; >> + PCIBus *bus; >> + uint16_t devfn; >> +} VTDPASIDCacheInfo; >> + >> /* PASID Table Related Definitions */ >> #define VTD_PASID_DIR_BASE_ADDR_MASK (~0xfffULL) >> #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL) >> @@ -563,6 +591,7 @@ typedef struct VTDRootEntry VTDRootEntry; >> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL) >> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & >VTD_PASID_TABLE_BITS_MASK) >> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing >> Disable >*/ >> +#define VTD_PASID_TBL_ENTRY_NUM (1ULL << 6) >> >> /* PASID Granular Translation Type Mask */ >> #define VTD_PASID_ENTRY_P 1ULL >> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h >> index 50f9b27a45..fbc9da903a 100644 >> --- a/include/hw/i386/intel_iommu.h >> +++ b/include/hw/i386/intel_iommu.h >> @@ -95,6 +95,11 @@ struct VTDPASIDEntry { >> uint64_t val[8]; >> }; >> >> +typedef struct VTDPASIDCacheEntry { >> + struct VTDPASIDEntry pasid_entry; >> + bool cache_filled; >> +} VTDPASIDCacheEntry; >> + >> struct VTDAddressSpace { >> PCIBus *bus; >> uint8_t devfn; >> @@ -107,6 +112,7 @@ struct VTDAddressSpace { >> MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */ >> IntelIOMMUState *iommu_state; >> VTDContextCacheEntry context_cache_entry; >> + VTDPASIDCacheEntry pasid_cache_entry; >> QLIST_ENTRY(VTDAddressSpace) next; >> /* Superset of notifier flags that this address space has */ >> IOMMUNotifierFlag notifier_flags; >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index fafa199f52..b8f3b85803 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -86,6 +86,8 @@ struct vtd_iotlb_key { >> static void vtd_address_space_refresh_all(IntelIOMMUState *s); >> static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier >*n); >> >> +static void vtd_pasid_cache_reset(IntelIOMMUState *s); >use _locked suffix to be consistent with the others and emphases the >lock is held? Will do. >> + >> static void vtd_panic_require_caching_mode(void) >> { >> error_report("We need to set caching-mode=on for intel-iommu to enable " >> @@ -390,6 +392,7 @@ static void vtd_reset_caches(IntelIOMMUState *s) >> vtd_iommu_lock(s); >> vtd_reset_iotlb_locked(s); >> vtd_reset_context_cache_locked(s); >> + vtd_pasid_cache_reset(s); >> vtd_iommu_unlock(s); >> } >> >> @@ -825,6 +828,16 @@ static inline bool >vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe) >> } >> } >> >> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce) >> +{ >> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce->val[0]) + 7); >> +} >> + >> +static inline uint16_t vtd_pe_get_domain_id(VTDPASIDEntry *pe) > >nit: vtd_pe_get_did as the filed is named DID? Will do. > >> +{ >> + return VTD_SM_PASID_ENTRY_DID((pe)->val[1]); >> +} >> + >> static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire) >> { >> return pdire->val & 1; >> @@ -1617,6 +1630,54 @@ static int >vtd_as_to_context_entry(VTDAddressSpace *vtd_as, VTDContextEntry *ce) >> } >> } >> >> +/* Translate to iommu pasid if PCI_NO_PASID */ >I don't really get the comment above. Ay best, shouldn't it be put in Will do, it's pci's pasid value vs. guest's pasid value which I call iommu pasid. > >(vtd_as->pasid == PCI_NO_PASID) block? >What you call "iommu pasid" is the value set in RID_PASID, right? Is "iommu >pasid" a conventional terminology? PCI subsystem uses PCI_NO_PASID(-1) to represent a Requests-without-PASID, but guest doesn't recognize PCI_NO_PASID(-1), so vIOMMU has to translate PCI_NO_PASID(-1) to an normal pasid value(pasid >= 0), I call it iommu pasid to distinguish with PCI's pasid, it's not conventional terminology. > >> +static int vtd_as_to_iommu_pasid(VTDAddressSpace *vtd_as, uint32_t *pasid) >> +{ >> + VTDContextEntry ce; >> + int ret; >> + >> + ret = vtd_as_to_context_entry(vtd_as, &ce); >> + if (ret) { >> + return ret; >> + } >> + >> + if (vtd_as->pasid == PCI_NO_PASID) { >> + *pasid = VTD_CE_GET_RID2PASID(&ce); >This is called RID_PASID in the spec. I think it would be easier for the >reader if could have a direct match with named fields so that we can >easily seach the spec. I agree with you about the match, but rid2pasid is wildly used in intel_iommu.c for a long history. I'd like to here more voice before renaming. >> + } else { >> + *pasid = vtd_as->pasid; >> + } >> + >> + return 0; >> +} >> + >> +static gboolean vtd_find_as_by_sid_and_iommu_pasid(gpointer key, gpointer >value, >> + gpointer user_data) >why iommu_pasid and not directly pasid? Because vtd_as->pasid is PCI's pasid, it needs to be transformed into iommu pasid. >> +{ >> + VTDAddressSpace *vtd_as = (VTDAddressSpace *)value; >> + struct vtd_as_raw_key *target = (struct vtd_as_raw_key *)user_data; >> + uint16_t sid = PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn); >> + uint32_t pasid; >> + >> + if (vtd_as_to_iommu_pasid(vtd_as, &pasid)) { >> + return false; >> + } >> + >> + return (pasid == target->pasid) && (sid == target->sid); >> +} >> + >> +/* Translate iommu pasid to vtd_as */ >> +static VTDAddressSpace *vtd_as_from_iommu_pasid(IntelIOMMUState *s, >> + uint16_t sid, uint32_t >> pasid) >> +{ >> + struct vtd_as_raw_key key = { >> + .sid = sid, >> + .pasid = pasid >> + }; >> + >> + return g_hash_table_find(s->vtd_address_spaces, >> + vtd_find_as_by_sid_and_iommu_pasid, &key); >> +} >> + >> static int vtd_sync_shadow_page_hook(const IOMMUTLBEvent *event, >> void *private) >> { >> @@ -3062,6 +3123,412 @@ static bool >vtd_process_piotlb_desc(IntelIOMMUState *s, >> return true; >> } >> >> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as, >> + uint32_t pasid, VTDPASIDEntry >> *pe) >does "pe" means pasid entry? It is not obvious for a dummy reader like >me. May be worth a comment at least once. May VTDPASIDEntry have given you enough hint? >> +{ >> + IntelIOMMUState *s = vtd_as->iommu_state; >> + VTDContextEntry ce; >> + int ret; >> + >> + if (!s->root_scalable) { >> + return -VTD_FR_RTADDR_INV_TTM; >> + } >> + >> + ret = vtd_as_to_context_entry(vtd_as, &ce); >> + if (ret) { >> + return ret; >> + } >> + >> + return vtd_ce_get_pasid_entry(s, &ce, pe, pasid); >> +} >> + >> +static bool vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2) >> +{ >> + return !memcmp(p1, p2, sizeof(*p1)); >> +} >> + >> +/* >> + * This function fills in the pasid entry in &vtd_as. Caller >> + * of this function should hold iommu_lock. >> + */ >> +static void vtd_fill_pe_in_cache(IntelIOMMUState *s, VTDAddressSpace >*vtd_as, >> + VTDPASIDEntry *pe) >seems some other functions used the _locked suffix Yes, vtd_pasid_cache_reset_locked() is, so no need to add suffix here, right? >> +{ >> + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry; >> + >> + if (vtd_pasid_entry_compare(pe, &pc_entry->pasid_entry)) { >> + /* No need to go further as cached pasid entry is latest */ >> + return; >> + } >> + >> + pc_entry->pasid_entry = *pe; >> + pc_entry->cache_filled = true; >> + /* >> + * TODO: send pasid bind to host for passthru devices >> + */ >what does it mean? Here means a new entry is found, we need to send pasid<->hwpt binding to host. >> +} >> + >> +/* >> + * This function is used to clear cached pasid entry in vtd_as >> + * instances. Caller of this function should hold iommu_lock. >> + */ >> +static gboolean vtd_flush_pasid(gpointer key, gpointer value, >> + gpointer user_data) >> +{ >> + VTDPASIDCacheInfo *pc_info = user_data; >> + VTDAddressSpace *vtd_as = value; >> + IntelIOMMUState *s = vtd_as->iommu_state; >> + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry; >> + VTDPASIDEntry pe; >> + uint16_t did; >> + uint32_t pasid; >> + int ret; >> + >> + /* Replay only fill pasid entry cache for passthrough device */ >filled Will do. Thanks Zhenzhong