-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 On 02/02/2015 06:04 PM, David Gibson wrote: > On Thu, Jan 29, 2015 at 08:27:20PM +1100, Alexey Kardashevskiy wrote: >> This makes use of the new "memory registering" feature. The idea is >> to provide the guest ability to notify the host kernel about pages >> which are going to be used for DMA. Having this information, the >> host kernel can pin them all, do locked pages accounting and not >> spent time on doing that in real time with possible failures which >> cannot be handled nicely in some cases. >> >> This adds a memory listener which notifies a VFIO container about >> memory which needs to be pinned/unpinned. VFIO MMIO regions are >> skipped. >> >> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> --- >> hw/vfio/common.c | 99 >> ++++++++++++++++++++++++++++++++++++++++++- >> include/hw/vfio/vfio-common.h | 3 +- 2 files changed, 100 >> insertions(+), 2 deletions(-) >> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c index >> cf483ff..c08f9ab 100644 --- a/hw/vfio/common.c +++ >> b/hw/vfio/common.c @@ -485,6 +485,99 @@ void >> vfio_listener_release(VFIOContainer *container) >> memory_listener_unregister(&container->iommu_data.type1.listener); >> } >> >> +static int vfio_mem_register(VFIOContainer *container, + >> void *vaddr, ram_addr_t size) +{ + struct >> vfio_iommu_type1_register_memory reg = { + .argsz = >> sizeof(reg), + .vaddr = (__u64)(uintptr_t)vaddr, + >> .size = size, + }; + long ret = ioctl(container->fd, >> VFIO_IOMMU_REGISTER_MEMORY, ®); + + DPRINTF("(%u) %s %u: >> va=%lx size=%lx, ret=%ld\n", getpid(), + __func__, >> __LINE__, reg.vaddr, reg.size, ret); + + return ret; +} + +static >> int vfio_mem_unregister(VFIOContainer *container, + >> void *vaddr, ram_addr_t size) +{ + struct >> vfio_iommu_type1_unregister_memory reg = { + .argsz = >> sizeof(reg), + .vaddr = (__u64)(uintptr_t)vaddr, + >> .size = size, + }; + long ret = ioctl(container->fd, >> VFIO_IOMMU_UNREGISTER_MEMORY, ®); + + DPRINTF("(%u) %s %u: >> va=%lx size=%lx, ret=%ld\n", getpid(), + __func__, >> __LINE__, reg.vaddr, reg.size, ret); + + return ret; +} + +static >> void vfio_ram_region_add(MemoryListener *listener, + >> MemoryRegionSection *section) +{ + VFIOContainer *container = >> container_of(listener, VFIOContainer, + >> iommu_data.type1.ramlistener); + hwaddr end; + Int128 llend; + >> void *vaddr; + + if (!memory_region_is_ram(section->mr) || + >> memory_region_is_skip_dump(section->mr)) { + return; + } >> + + llend = int128_make64(section->offset_within_address_space); >> + llend = int128_add(llend, section->size); > > Does this need an add TARGET_PAGE_SIZE-1 in order to make it round up > to a page boundary, rather than truncate to a page boundary?
Fixed. >> + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + + >> memory_region_ref(section->mr); + + end = int128_get64(llend); + >> vaddr = memory_region_get_ram_ptr(section->mr) + + >> section->offset_within_region; + vfio_mem_register(container, >> vaddr, end); +} + +static void vfio_ram_region_del(MemoryListener >> *listener, + MemoryRegionSection >> *section) +{ + VFIOContainer *container = container_of(listener, >> VFIOContainer, + >> iommu_data.type1.ramlistener); + hwaddr iova, end; + void >> *vaddr; + + if (!memory_region_is_ram(section->mr) || + >> memory_region_is_skip_dump(section->mr)) { + return; + } >> + + + iova = >> TARGET_PAGE_ALIGN(section->offset_within_address_space); + end = >> (section->offset_within_address_space + int128_get64(section->size)) >> & + TARGET_PAGE_MASK; + vaddr = >> memory_region_get_ram_ptr(section->mr) + + >> section->offset_within_region + + (iova - >> section->offset_within_address_space); > > It's not clear to me why region_add and region_del have a different > set of address calculations. Cut-n-paste from the other listener :) I will rework it. >> + vfio_mem_unregister(container, vaddr, end - iova); +} + +const >> MemoryListener vfio_ram_listener = { + .region_add = >> vfio_ram_region_add, + .region_del = vfio_ram_region_del, +}; + >> +static void vfio_spapr_listener_release(VFIOContainer *container) >> +{ + >> memory_listener_unregister(&container->iommu_data.type1.listener); + >> memory_listener_unregister(&container->iommu_data.type1.ramlistener); > >> > Accessing fields within type1 from a function whose name says it's > spapr specific seems very wrong. Kind of ugly, yes. But we share the common memory listener with Type1 so... >> +} + int vfio_mmap_region(Object *obj, VFIORegion *region, >> MemoryRegion *mem, MemoryRegion *submem, void **map, size_t size, >> off_t offset, @@ -705,6 +798,10 @@ static int >> vfio_connect_container(VFIOGroup *group, AddressSpace *as) goto >> free_container_exit; } >> >> + container->iommu_data.type1.ramlistener = >> vfio_ram_listener; + >> memory_listener_register(&container->iommu_data.type1.ramlistener, + >> &address_space_memory); > > Why two separate listeners, rather than doing both jobs from a single > listener? ... I actually like the idea to have this separated from the rest of the code. Furthermore, now I think we better have separate memory listeners for Type1 and SPAPR as the current vfio_listener_region_add()/del() look quite ugly trying to do different things depending on memory_region_is_iommu(). Any objection to separating SPAPR's listener (and merging it with the one introduced by this patch)? >> /* * The host kernel code implementing VFIO_IOMMU_DISABLE is called >> * when container fd is closed so we do not call it explicitly @@ >> -718,7 +815,7 @@ static int vfio_connect_container(VFIOGroup *group, >> AddressSpace *as) } >> >> container->iommu_data.type1.listener = vfio_memory_listener; - >> container->iommu_data.release = vfio_listener_release; + >> container->iommu_data.release = vfio_spapr_listener_release; >> >> memory_listener_register(&container->iommu_data.type1.listener, >> container->space->as); diff --git a/include/hw/vfio/vfio-common.h >> b/include/hw/vfio/vfio-common.h index 1d5bfe8..3f91216 100644 --- >> a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h >> @@ -66,6 +66,7 @@ struct VFIOGroup; >> >> typedef struct VFIOType1 { MemoryListener listener; + >> MemoryListener ramlistener; int error; bool initialized; } >> VFIOType1; @@ -144,7 +145,7 @@ int vfio_get_device(VFIOGroup *group, >> const char *name, VFIODevice *vbasedev); >> >> extern const MemoryRegionOps vfio_region_ops; -extern const >> MemoryListener vfio_memory_listener; +extern const MemoryListener >> vfio_memory_listener, vfio_ram_listener; extern >> QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list; extern >> QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces; >> > - -- Alexey -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAEBAgAGBQJU4qQJAAoJEIYTPdgrwSC51g0P/iNSwMhniXDP/OIwUNA/fN+l WWVOpNT9aMgkJM7nttgUITc8SGvkqVsF50KsB8E6o6/tcYy4eKWFwZhRHCDWhAAg 6n6iJtSfGW7jQ2HEkTFB60RCEXhDsHDyuG0Q9BHkk/7EAOFqTPNXncv14Ant81GB rDRtT5vQsvmr6nT8Qw7Yzr1ip1kVl60YVSjxGjD2D3xU7TLIqNkIGbBgSKErcGh5 19hGsN7GcTTmteFrUmKpmeRt/yRazmHuzV5eMVuwnO6rDX4TCbVZpLUrmsDd3K2A ePr5oCtWZl45b9MwZT1DLkgN1SjYvH5NgbeYBPOSO5BeGu0TcedULNw++ptpfm+v G7P5k4TLWze9PM/vVh1e6QZp/pe2eHkFOBqr+I35T8eMgCXkETbLPwHVpdsybrua 2r1GwayCopk6uikcHWsBGuQ3+1QNNUBwBm1h9MYBLjB+dC9tfQqCkK0X8fbmhpSG 0doaM36RZdiPc0iP5qyih80F7MnNRUuKPwa2k0z6fEgpnkT4JqIDHFX7CrznqYrT BmfYSsX3a45YnxTsYLtPRmb2EQDojQ66yiCu70sMXFk+j+lojnpov/zal/4K67Ws uliuSxm/tNNc17sppRHhTfHsFwOkJ92KNwfqxN/6o6LOtv5GFVsR0tdiBAOnZYfP OZH2mTBVfccG3KgsnelV =x4bG -----END PGP SIGNATURE-----