>-----Original Message-----
>From: Eric Auger <eric.au...@redhat.com>
>Subject: Re: [PATCH rfcv2 13/20] intel_iommu: Add PASID cache management
>infrastructure
>
>
>
>
>On 2/19/25 9:22 AM, Zhenzhong Duan wrote:
>> This adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the
>> pasid entry and track PASID usage and future PASID tagged DMA address
>> translation support in vIOMMU.
>>
>> VTDAddressSpace of PCI_NO_PASID is allocated when device is plugged and
>> never freed. For other pasid, VTDAddressSpace instance is created/destroyed
>> per the guest pasid entry set up/destroy for passthrough devices. While for
>> emulated devices, VTDAddressSpace instance is created in the PASID tagged
>DMA
>> translation and be destroyed per guest PASID cache invalidation. This focuses
>> on the PASID cache management for passthrough devices as there is no PASID
>> capable emulated devices yet.
>>
>> When guest modifies a PASID entry, QEMU will capture the guest pasid
>selective
>> pasid cache invalidation, allocate or remove a VTDAddressSpace instance per
>the
>> invalidation reasons:
>>
>>     *) a present pasid entry moved to non-present
>>     *) a present pasid entry to be a present entry
>>     *) a non-present pasid entry moved to present
>>
>> vIOMMU emulator could figure out the reason by fetching latest guest pasid
>entry
>> and compare it with the PASID cache.
>>
>> Signed-off-by: Yi Liu <yi.l....@intel.com>
>> Signed-off-by: Yi Sun <yi.y....@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
>> ---
>>  hw/i386/intel_iommu_internal.h |  29 ++
>>  include/hw/i386/intel_iommu.h  |   6 +
>>  hw/i386/intel_iommu.c          | 484 ++++++++++++++++++++++++++++++++-
>Don't you have ways to split this patch. It has a huge change set and
>this is really heavy to digest at once (at least for me).

Sure, will try.

>>  hw/i386/trace-events           |   4 +
>>  4 files changed, 513 insertions(+), 10 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index 18bc22fc72..632fda2853 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -315,6 +315,7 @@ typedef enum VTDFaultReason {
>>                                    * request while disabled */
>>      VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>>
>> +    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
>>      /* PASID directory entry access failure */
>>      VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>>      /* The Present(P) field of pasid directory entry is 0 */
>> @@ -492,6 +493,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>  #define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
>>  #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>>
>> +#define VTD_INV_DESC_PASIDC_G          (3ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_PASID(val) (((val) >> 32) & 0xfffffULL)
>> +#define VTD_INV_DESC_PASIDC_DID(val)   (((val) >> 16) &
>VTD_DOMAIN_ID_MASK)
>> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0  0xfff000000000f1c0ULL
>> +
>> +#define VTD_INV_DESC_PASIDC_DSI        (0ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_PASID_SI   (1ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_GLOBAL     (3ULL << 4)
>> +
>>  /* Information about page-selective IOTLB invalidate */
>>  struct VTDIOTLBPageInvInfo {
>>      uint16_t domain_id;
>> @@ -548,10 +558,28 @@ typedef struct VTDRootEntry VTDRootEntry;
>>  #define VTD_CTX_ENTRY_LEGACY_SIZE     16
>>  #define VTD_CTX_ENTRY_SCALABLE_SIZE   32
>>
>> +#define VTD_SM_CONTEXT_ENTRY_PDTS(val)      (((val) >> 9) & 0x7)
>>  #define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xfffff
>>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>~VTD_HAW_MASK(aw))
>>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>>
>> +typedef enum VTDPCInvType {
>> +    /* force reset all */
>> +    VTD_PASID_CACHE_FORCE_RESET = 0,
>> +    /* pasid cache invalidation rely on guest PASID entry */
>> +    VTD_PASID_CACHE_GLOBAL_INV,
>> +    VTD_PASID_CACHE_DOMSI,
>> +    VTD_PASID_CACHE_PASIDSI,
>> +} VTDPCInvType;
>> +
>> +typedef struct VTDPASIDCacheInfo {
>> +    VTDPCInvType type;
>> +    uint16_t domain_id;
>> +    uint32_t pasid;
>> +    PCIBus *bus;
>> +    uint16_t devfn;
>> +} VTDPASIDCacheInfo;
>> +
>>  /* PASID Table Related Definitions */
>>  #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
>>  #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
>> @@ -563,6 +591,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>  #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
>>  #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) &
>VTD_PASID_TABLE_BITS_MASK)
>>  #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing 
>> Disable
>*/
>> +#define VTD_PASID_TBL_ENTRY_NUM       (1ULL << 6)
>>
>>  /* PASID Granular Translation Type Mask */
>>  #define VTD_PASID_ENTRY_P              1ULL
>> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
>> index 50f9b27a45..fbc9da903a 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>>      uint64_t val[8];
>>  };
>>
>> +typedef struct VTDPASIDCacheEntry {
>> +    struct VTDPASIDEntry pasid_entry;
>> +    bool cache_filled;
>> +} VTDPASIDCacheEntry;
>> +
>>  struct VTDAddressSpace {
>>      PCIBus *bus;
>>      uint8_t devfn;
>> @@ -107,6 +112,7 @@ struct VTDAddressSpace {
>>      MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */
>>      IntelIOMMUState *iommu_state;
>>      VTDContextCacheEntry context_cache_entry;
>> +    VTDPASIDCacheEntry pasid_cache_entry;
>>      QLIST_ENTRY(VTDAddressSpace) next;
>>      /* Superset of notifier flags that this address space has */
>>      IOMMUNotifierFlag notifier_flags;
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index fafa199f52..b8f3b85803 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -86,6 +86,8 @@ struct vtd_iotlb_key {
>>  static void vtd_address_space_refresh_all(IntelIOMMUState *s);
>>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier
>*n);
>>
>> +static void vtd_pasid_cache_reset(IntelIOMMUState *s);
>use _locked suffix to be consistent with the others and emphases the
>lock is held?

Will do.

>> +
>>  static void vtd_panic_require_caching_mode(void)
>>  {
>>      error_report("We need to set caching-mode=on for intel-iommu to enable "
>> @@ -390,6 +392,7 @@ static void vtd_reset_caches(IntelIOMMUState *s)
>>      vtd_iommu_lock(s);
>>      vtd_reset_iotlb_locked(s);
>>      vtd_reset_context_cache_locked(s);
>> +    vtd_pasid_cache_reset(s);
>>      vtd_iommu_unlock(s);
>>  }
>>
>> @@ -825,6 +828,16 @@ static inline bool
>vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
>>      }
>>  }
>>
>> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
>> +{
>> +    return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce->val[0]) + 7);
>> +}
>> +
>> +static inline uint16_t vtd_pe_get_domain_id(VTDPASIDEntry *pe)
>
>nit: vtd_pe_get_did as the filed is named DID?

Will do.

>
>> +{
>> +    return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
>> +}
>> +
>>  static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
>>  {
>>      return pdire->val & 1;
>> @@ -1617,6 +1630,54 @@ static int
>vtd_as_to_context_entry(VTDAddressSpace *vtd_as, VTDContextEntry *ce)
>>      }
>>  }
>>
>> +/* Translate to iommu pasid if PCI_NO_PASID */
>I don't really get the comment above. Ay best, shouldn't it be put in

Will do, it's pci's pasid value vs. guest's pasid value which I call iommu 
pasid.

>
>(vtd_as->pasid == PCI_NO_PASID) block?
>What you call "iommu pasid" is the value set in RID_PASID, right? Is "iommu
>pasid" a conventional terminology?

PCI subsystem uses PCI_NO_PASID(-1) to represent a Requests-without-PASID,
but guest doesn't recognize PCI_NO_PASID(-1), so vIOMMU has to translate
PCI_NO_PASID(-1) to an normal pasid value(pasid >= 0), I call it iommu pasid
to distinguish with PCI's pasid, it's not conventional terminology.

>
>> +static int vtd_as_to_iommu_pasid(VTDAddressSpace *vtd_as, uint32_t *pasid)
>> +{
>> +    VTDContextEntry ce;
>> +    int ret;
>> +
>> +    ret = vtd_as_to_context_entry(vtd_as, &ce);
>> +    if (ret) {
>> +        return ret;
>> +    }
>> +
>> +    if (vtd_as->pasid == PCI_NO_PASID) {
>> +        *pasid = VTD_CE_GET_RID2PASID(&ce);
>This is called RID_PASID in the spec. I think it would be easier for the
>reader if could have a direct match with named fields so that we can
>easily seach the spec.

I agree with you about the match, but rid2pasid is wildly used in intel_iommu.c
for a long history. I'd like to here more voice before renaming.

>> +    } else {
>> +        *pasid = vtd_as->pasid;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static gboolean vtd_find_as_by_sid_and_iommu_pasid(gpointer key, gpointer
>value,
>> +                                                   gpointer user_data)
>why iommu_pasid and not directly pasid?

Because vtd_as->pasid is PCI's pasid, it needs to be transformed into iommu 
pasid.

>> +{
>> +    VTDAddressSpace *vtd_as = (VTDAddressSpace *)value;
>> +    struct vtd_as_raw_key *target = (struct vtd_as_raw_key *)user_data;
>> +    uint16_t sid = PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn);
>> +    uint32_t pasid;
>> +
>> +    if (vtd_as_to_iommu_pasid(vtd_as, &pasid)) {
>> +        return false;
>> +    }
>> +
>> +    return (pasid == target->pasid) && (sid == target->sid);
>> +}
>> +
>> +/* Translate iommu pasid to vtd_as */
>> +static VTDAddressSpace *vtd_as_from_iommu_pasid(IntelIOMMUState *s,
>> +                                                uint16_t sid, uint32_t 
>> pasid)
>> +{
>> +    struct vtd_as_raw_key key = {
>> +        .sid = sid,
>> +        .pasid = pasid
>> +    };
>> +
>> +    return g_hash_table_find(s->vtd_address_spaces,
>> +                             vtd_find_as_by_sid_and_iommu_pasid, &key);
>> +}
>> +
>>  static int vtd_sync_shadow_page_hook(const IOMMUTLBEvent *event,
>>                                       void *private)
>>  {
>> @@ -3062,6 +3123,412 @@ static bool
>vtd_process_piotlb_desc(IntelIOMMUState *s,
>>      return true;
>>  }
>>
>> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
>> +                                            uint32_t pasid, VTDPASIDEntry 
>> *pe)
>does "pe" means pasid entry? It is not obvious for a dummy reader like
>me. May be worth a comment at least once.

May VTDPASIDEntry have given you enough hint?

>> +{
>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>> +    VTDContextEntry ce;
>> +    int ret;
>> +
>> +    if (!s->root_scalable) {
>> +        return -VTD_FR_RTADDR_INV_TTM;
>> +    }
>> +
>> +    ret = vtd_as_to_context_entry(vtd_as, &ce);
>> +    if (ret) {
>> +        return ret;
>> +    }
>> +
>> +    return vtd_ce_get_pasid_entry(s, &ce, pe, pasid);
>> +}
>> +
>> +static bool vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>> +{
>> +    return !memcmp(p1, p2, sizeof(*p1));
>> +}
>> +
>> +/*
>> + * This function fills in the pasid entry in &vtd_as. Caller
>> + * of this function should hold iommu_lock.
>> + */
>> +static void vtd_fill_pe_in_cache(IntelIOMMUState *s, VTDAddressSpace
>*vtd_as,
>> +                                 VTDPASIDEntry *pe)
>seems some other functions used the _locked suffix

Yes, vtd_pasid_cache_reset_locked() is, so no need to add suffix here, right?

>> +{
>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> +
>> +    if (vtd_pasid_entry_compare(pe, &pc_entry->pasid_entry)) {
>> +        /* No need to go further as cached pasid entry is latest */
>> +        return;
>> +    }
>> +
>> +    pc_entry->pasid_entry = *pe;
>> +    pc_entry->cache_filled = true;
>> +    /*
>> +     * TODO: send pasid bind to host for passthru devices
>> +     */
>what does it mean?

Here means a new entry is found, we need to send pasid<->hwpt binding
to host.

>> +}
>> +
>> +/*
>> + * This function is used to clear cached pasid entry in vtd_as
>> + * instances. Caller of this function should hold iommu_lock.
>> + */
>> +static gboolean vtd_flush_pasid(gpointer key, gpointer value,
>> +                                gpointer user_data)
>> +{
>> +    VTDPASIDCacheInfo *pc_info = user_data;
>> +    VTDAddressSpace *vtd_as = value;
>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> +    VTDPASIDEntry pe;
>> +    uint16_t did;
>> +    uint32_t pasid;
>> +    int ret;
>> +
>> +    /* Replay only fill pasid entry cache for passthrough device */
>filled

Will do.

Thanks
Zhenzhong

Reply via email to