On Thu, Jan 29, 2015 at 08:27:20PM +1100, Alexey Kardashevskiy wrote: > This makes use of the new "memory registering" feature. The idea is > to provide the guest ability to notify the host kernel about pages which > are going to be used for DMA. Having this information, the host kernel > can pin them all, do locked pages accounting and not spent time on > doing that in real time with possible failures which cannot be handled > nicely in some cases. > > This adds a memory listener which notifies a VFIO container about memory > which needs to be pinned/unpinned. VFIO MMIO regions are skipped. > > Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> > --- > hw/vfio/common.c | 99 > ++++++++++++++++++++++++++++++++++++++++++- > include/hw/vfio/vfio-common.h | 3 +- > 2 files changed, 100 insertions(+), 2 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index cf483ff..c08f9ab 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -485,6 +485,99 @@ void vfio_listener_release(VFIOContainer *container) > memory_listener_unregister(&container->iommu_data.type1.listener); > } > > +static int vfio_mem_register(VFIOContainer *container, > + void *vaddr, ram_addr_t size) > +{ > + struct vfio_iommu_type1_register_memory reg = { > + .argsz = sizeof(reg), > + .vaddr = (__u64)(uintptr_t)vaddr, > + .size = size, > + }; > + long ret = ioctl(container->fd, VFIO_IOMMU_REGISTER_MEMORY, ®); > + > + DPRINTF("(%u) %s %u: va=%lx size=%lx, ret=%ld\n", getpid(), > + __func__, __LINE__, reg.vaddr, reg.size, ret); > + > + return ret; > +} > + > +static int vfio_mem_unregister(VFIOContainer *container, > + void *vaddr, ram_addr_t size) > +{ > + struct vfio_iommu_type1_unregister_memory reg = { > + .argsz = sizeof(reg), > + .vaddr = (__u64)(uintptr_t)vaddr, > + .size = size, > + }; > + long ret = ioctl(container->fd, VFIO_IOMMU_UNREGISTER_MEMORY, ®); > + > + DPRINTF("(%u) %s %u: va=%lx size=%lx, ret=%ld\n", getpid(), > + __func__, __LINE__, reg.vaddr, reg.size, ret); > + > + return ret; > +} > + > +static void vfio_ram_region_add(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIOContainer *container = container_of(listener, VFIOContainer, > + iommu_data.type1.ramlistener); > + hwaddr end; > + Int128 llend; > + void *vaddr; > + > + if (!memory_region_is_ram(section->mr) || > + memory_region_is_skip_dump(section->mr)) { > + return; > + } > + > + llend = int128_make64(section->offset_within_address_space); > + llend = int128_add(llend, section->size);
Does this need an add TARGET_PAGE_SIZE-1 in order to make it round up to a page boundary, rather than truncate to a page boundary? > + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); > + > + memory_region_ref(section->mr); > + > + end = int128_get64(llend); > + vaddr = memory_region_get_ram_ptr(section->mr) + > + section->offset_within_region; > + vfio_mem_register(container, vaddr, end); > +} > + > +static void vfio_ram_region_del(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIOContainer *container = container_of(listener, VFIOContainer, > + iommu_data.type1.ramlistener); > + hwaddr iova, end; > + void *vaddr; > + > + if (!memory_region_is_ram(section->mr) || > + memory_region_is_skip_dump(section->mr)) { > + return; > + } > + > + > + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > + end = (section->offset_within_address_space + > int128_get64(section->size)) & > + TARGET_PAGE_MASK; > + vaddr = memory_region_get_ram_ptr(section->mr) + > + section->offset_within_region + > + (iova - section->offset_within_address_space); It's not clear to me why region_add and region_del have a different set of address calculations. > + vfio_mem_unregister(container, vaddr, end - iova); > +} > + > +const MemoryListener vfio_ram_listener = { > + .region_add = vfio_ram_region_add, > + .region_del = vfio_ram_region_del, > +}; > + > +static void vfio_spapr_listener_release(VFIOContainer *container) > +{ > + memory_listener_unregister(&container->iommu_data.type1.listener); > + memory_listener_unregister(&container->iommu_data.type1.ramlistener); Accessing fields within type1 from a function whose name says it's spapr specific seems very wrong. > +} > + > int vfio_mmap_region(Object *obj, VFIORegion *region, > MemoryRegion *mem, MemoryRegion *submem, > void **map, size_t size, off_t offset, > @@ -705,6 +798,10 @@ static int vfio_connect_container(VFIOGroup *group, > AddressSpace *as) > goto free_container_exit; > } > > + container->iommu_data.type1.ramlistener = vfio_ram_listener; > + memory_listener_register(&container->iommu_data.type1.ramlistener, > + &address_space_memory); Why two separate listeners, rather than doing both jobs from a single listener? > /* > * The host kernel code implementing VFIO_IOMMU_DISABLE is called > * when container fd is closed so we do not call it explicitly > @@ -718,7 +815,7 @@ static int vfio_connect_container(VFIOGroup *group, > AddressSpace *as) > } > > container->iommu_data.type1.listener = vfio_memory_listener; > - container->iommu_data.release = vfio_listener_release; > + container->iommu_data.release = vfio_spapr_listener_release; > > memory_listener_register(&container->iommu_data.type1.listener, > container->space->as); > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 1d5bfe8..3f91216 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -66,6 +66,7 @@ struct VFIOGroup; > > typedef struct VFIOType1 { > MemoryListener listener; > + MemoryListener ramlistener; > int error; > bool initialized; > } VFIOType1; > @@ -144,7 +145,7 @@ int vfio_get_device(VFIOGroup *group, const char *name, > VFIODevice *vbasedev); > > extern const MemoryRegionOps vfio_region_ops; > -extern const MemoryListener vfio_memory_listener; > +extern const MemoryListener vfio_memory_listener, vfio_ram_listener; > extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list; > extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces; > -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
pgpg5qNTs_4KR.pgp
Description: PGP signature