On Thu, Jan 14, 2016 at 12:47:24AM -0500, Jason Wang wrote: > Current intel_iommu only supports 4K page which may not be sufficient > to cover guest working set. This patch tries to enable 2M and 1G mapping > for intel_iommu. This is also useful for future device IOTLB > implementation to have a better hit rate. > > Major work is adding a page mask field on IOTLB entry to make it > support large page. And also use the slpte level as key to do IOTLB > lookup. MAMV was increased to 18 to support direct invalidation for 1G > mapping. > > Cc: Michael S. Tsirkin <m...@redhat.com> > Cc: Paolo Bonzini <pbonz...@redhat.com> > Cc: Richard Henderson <r...@twiddle.net> > Cc: Eduardo Habkost <ehabk...@redhat.com> > Signed-off-by: Jason Wang <jasow...@redhat.com>
Looks good, thanks! I was going to comment that changes such as MAMV would have to be versioned, when I noticed that this device is unmigrateable ATM. So no issue, but we do need to fix migration for it. > --- > Test was done by virtio-net-pmd/vfio with 2M or 1G mapping in guest. > --- > hw/i386/intel_iommu.c | 76 > ++++++++++++++++++++++++++++++------------ > hw/i386/intel_iommu_internal.h | 6 ++-- > include/hw/i386/intel_iommu.h | 1 + > 3 files changed, 59 insertions(+), 24 deletions(-) > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 3fe27fa..68940a0 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -152,14 +152,27 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, > gpointer value, > return entry->domain_id == domain_id; > } > > +/* The shift of an addr for a certain level of paging structure */ > +static inline uint32_t vtd_slpt_level_shift(uint32_t level) > +{ > + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS; > +} > + > +static inline uint64_t vtd_slpt_level_page_mask(uint32_t level) > +{ > + return ~((1ULL << vtd_slpt_level_shift(level)) - 1); > +} > + > static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, > gpointer user_data) > { > VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; > VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; > - uint64_t gfn = info->gfn & info->mask; > + uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask; > + uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K; > return (entry->domain_id == info->domain_id) && > - ((entry->gfn & info->mask) == gfn); > + (((entry->gfn & info->mask) == gfn) || > + (entry->gfn == gfn_tlb)); > } > > /* Reset all the gen of VTDAddressSpace to zero and set the gen of > @@ -193,24 +206,46 @@ static void vtd_reset_iotlb(IntelIOMMUState *s) > g_hash_table_remove_all(s->iotlb); > } > > +static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint8_t source_id, > + uint32_t level) > +{ > + return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) | > + ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT); > +} > + > +static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) > +{ > + return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; > +} > + > static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t > source_id, > hwaddr addr) > { > + VTDIOTLBEntry *entry; > uint64_t key; > + int level; > + > + for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { > + key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level), > + source_id, level); > + entry = g_hash_table_lookup(s->iotlb, &key); > + if (entry) { > + goto out; > + } > + } > > - key = (addr >> VTD_PAGE_SHIFT_4K) | > - ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT); > - return g_hash_table_lookup(s->iotlb, &key); > - > +out: > + return entry; > } > > static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, > uint16_t domain_id, hwaddr addr, uint64_t slpte, > - bool read_flags, bool write_flags) > + bool read_flags, bool write_flags, > + uint32_t level) > { > VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); > uint64_t *key = g_malloc(sizeof(*key)); > - uint64_t gfn = addr >> VTD_PAGE_SHIFT_4K; > + uint64_t gfn = vtd_get_iotlb_gfn(addr, level); > > VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64 > " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte, > @@ -225,7 +260,8 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t > source_id, > entry->slpte = slpte; > entry->read_flags = read_flags; > entry->write_flags = write_flags; > - *key = gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT); > + entry->mask = vtd_slpt_level_page_mask(level); > + *key = vtd_get_iotlb_key(gfn, source_id, level); > g_hash_table_replace(s->iotlb, key, entry); > } > > @@ -500,12 +536,6 @@ static inline dma_addr_t > vtd_get_slpt_base_from_context(VTDContextEntry *ce) > return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; > } > > -/* The shift of an addr for a certain level of paging structure */ > -static inline uint32_t vtd_slpt_level_shift(uint32_t level) > -{ > - return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS; > -} > - > static inline uint64_t vtd_get_slpte_addr(uint64_t slpte) > { > return slpte & VTD_SL_PT_BASE_ADDR_MASK; > @@ -761,7 +791,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace > *vtd_as, PCIBus *bus, > VTDContextEntry ce; > uint8_t bus_num = pci_bus_num(bus); > VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry; > - uint64_t slpte; > + uint64_t slpte, page_mask; > uint32_t level; > uint16_t source_id = vtd_make_source_id(bus_num, devfn); > int ret_fr; > @@ -801,6 +831,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace > *vtd_as, PCIBus *bus, > slpte = iotlb_entry->slpte; > reads = iotlb_entry->read_flags; > writes = iotlb_entry->write_flags; > + page_mask = iotlb_entry->mask; > goto out; > } > /* Try to fetch context-entry from cache first */ > @@ -847,12 +878,13 @@ static void vtd_do_iommu_translate(VTDAddressSpace > *vtd_as, PCIBus *bus, > return; > } > > + page_mask = vtd_slpt_level_page_mask(level); > vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte, > - reads, writes); > + reads, writes, level); > out: > - entry->iova = addr & VTD_PAGE_MASK_4K; > - entry->translated_addr = vtd_get_slpte_addr(slpte) & VTD_PAGE_MASK_4K; > - entry->addr_mask = ~VTD_PAGE_MASK_4K; > + entry->iova = addr & page_mask; > + entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask; > + entry->addr_mask = ~page_mask; > entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0); > } > > @@ -990,7 +1022,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState > *s, uint16_t domain_id, > > assert(am <= VTD_MAMV); > info.domain_id = domain_id; > - info.gfn = addr >> VTD_PAGE_SHIFT_4K; > + info.addr = addr; > info.mask = ~((1 << am) - 1); > g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info); > } > @@ -1916,7 +1948,7 @@ static void vtd_init(IntelIOMMUState *s) > s->iq_last_desc_type = VTD_INV_DESC_NONE; > s->next_frcd_reg = 0; > s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW | > - VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI; > + VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS; > s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; > > vtd_reset_context_cache(s); > diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h > index ba288ab..e5f514c 100644 > --- a/hw/i386/intel_iommu_internal.h > +++ b/hw/i386/intel_iommu_internal.h > @@ -113,6 +113,7 @@ > > /* The shift of source_id in the key of IOTLB hash table */ > #define VTD_IOTLB_SID_SHIFT 36 > +#define VTD_IOTLB_LVL_SHIFT 44 > #define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */ > > /* IOTLB_REG */ > @@ -185,9 +186,10 @@ > #define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL) > #define VTD_MGAW 39 /* Maximum Guest Address Width */ > #define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16) > -#define VTD_MAMV 9ULL > +#define VTD_MAMV 18ULL > #define VTD_CAP_MAMV (VTD_MAMV << 48) > #define VTD_CAP_PSI (1ULL << 39) > +#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35)) > > /* Supported Adjusted Guest Address Widths */ > #define VTD_CAP_SAGAW_SHIFT 8 > @@ -320,7 +322,7 @@ typedef struct VTDInvDesc VTDInvDesc; > /* Information about page-selective IOTLB invalidate */ > struct VTDIOTLBPageInvInfo { > uint16_t domain_id; > - uint64_t gfn; > + uint64_t addr; > uint8_t mask; > }; > typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo; > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h > index 5dbadb7..b024ffa 100644 > --- a/include/hw/i386/intel_iommu.h > +++ b/include/hw/i386/intel_iommu.h > @@ -83,6 +83,7 @@ struct VTDIOTLBEntry { > uint64_t gfn; > uint16_t domain_id; > uint64_t slpte; > + uint64_t mask; > bool read_flags; > bool write_flags; > }; > -- > 1.8.3.1