On 17.11.2023 10:47, Roger Pau Monne wrote: > --- a/xen/arch/x86/hvm/io.c > +++ b/xen/arch/x86/hvm/io.c > @@ -364,9 +364,20 @@ static const struct hvm_mmcfg *vpci_mmcfg_find(const > struct domain *d, > return NULL; > } > > -bool vpci_is_mmcfg_address(const struct domain *d, paddr_t addr) > +int vpci_subtract_mmcfg(const struct domain *d, struct rangeset *r)
While there, also add __hwdom_init? > @@ -447,62 +451,135 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain > *d) > if ( iommu_hwdom_passthrough ) > return; > > - max_pfn = (GB(4) >> PAGE_SHIFT) - 1; > - top = max(max_pdx, pfn_to_pdx(max_pfn) + 1); > + map = rangeset_new(NULL, NULL, 0); > + if ( !map ) > + panic("IOMMU initialization failed unable to allocate rangeset\n"); This reads a little odd, and could probably do with shortening anyway (e.g. "IOMMU init: unable to allocate rangeset\n"). > + if ( iommu_hwdom_inclusive ) > + { > + /* Add the whole range below 4GB, UNUSABLE regions will be removed. > */ > + rc = rangeset_add_range(map, 0, max_pfn); > + if ( rc ) > + panic("IOMMU inclusive mappings can't be added: %d\n", > + rc); > + } > > - for ( i = 0, start = 0, count = 0; i < top; ) > + for ( i = 0; i < e820.nr_map; i++ ) > { > - unsigned long pfn = pdx_to_pfn(i); > - unsigned int perms = hwdom_iommu_map(d, pfn, max_pfn); > + struct e820entry entry = e820.map[i]; > > - if ( !perms ) > - /* nothing */; > - else if ( paging_mode_translate(d) ) > + switch ( entry.type ) > { > - int rc; > + case E820_UNUSABLE: > + if ( !iommu_hwdom_inclusive || PFN_DOWN(entry.addr) > max_pfn ) > + continue; It's > here, but ... > - rc = p2m_add_identity_entry(d, pfn, > - perms & IOMMUF_writable ? > p2m_access_rw > - : > p2m_access_r, > - 0); > + rc = rangeset_remove_range(map, PFN_DOWN(entry.addr), > + PFN_DOWN(entry.addr + entry.size - > 1)); > if ( rc ) > - printk(XENLOG_WARNING > - "%pd: identity mapping of %lx failed: %d\n", > - d, pfn, rc); > + panic("IOMMU failed to remove unusable memory: %d\n", > + rc); > + continue; > + > + case E820_RESERVED: > + if ( !iommu_hwdom_inclusive && !iommu_hwdom_reserved ) > + continue; > + break; > + > + case E820_RAM: > + if ( iommu_hwdom_strict ) > + continue; > + break; > + > + default: > + if ( !iommu_hwdom_inclusive || PFN_DOWN(entry.addr) >= max_pfn ) > + continue; ... >= here? > + entry.size = pfn_to_paddr(max_pfn) - 1 - entry.addr; Why the subtraction of 1 when you're calculating size? Don't you actually need to add 1 to max_pfn before converting to paddr? While overall things look plausible elsewhere, I'm hoping that - as asked for by Andrew - it'll be possible to split this some, to make it a little more reasonable to actually look at the changes done. Jan