> On Nov 19, 2021, at 2:42 PM, Alex Williamson <alex.william...@redhat.com> > wrote: > > On Mon, 8 Nov 2021 16:46:44 -0800 > John Johnson <john.g.john...@oracle.com> wrote: > >> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> >> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> >> Signed-off-by: John G Johnson <john.g.john...@oracle.com> >> --- >> hw/vfio/pci.h | 1 + >> hw/vfio/user-protocol.h | 32 +++++++ >> hw/vfio/user.h | 1 + >> include/hw/vfio/vfio-common.h | 4 + >> hw/vfio/common.c | 76 +++++++++++++--- >> hw/vfio/pci.c | 4 + >> hw/vfio/user.c | 206 >> ++++++++++++++++++++++++++++++++++++++++++ >> 7 files changed, 309 insertions(+), 15 deletions(-) >> >> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h >> index 643ff75..156fee2 100644 >> --- a/hw/vfio/pci.h >> +++ b/hw/vfio/pci.h >> @@ -193,6 +193,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, >> VFIO_USER_PCI) >> struct VFIOUserPCIDevice { >> VFIOPCIDevice device; >> char *sock_name; >> + bool secure_dma; /* disable shared mem for DMA */ > > ???????????? It's there, it's gone, it's back. >
This was a merge mistake >> >> >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index c0e7632..dcfae2c 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -90,6 +90,8 @@ typedef struct VFIOContainer { >> VFIOContIO *io_ops; >> bool initialized; >> bool dirty_pages_supported; >> + bool will_commit; > > The entire will_commit concept hidden in the map and unmap operations > from many patches ago should be introduced here, or later. > ok >> + bool need_map_fd; >> uint64_t dirty_pgsizes; >> uint64_t max_dirty_bitmap_size; >> unsigned long pgsizes; >> @@ -210,6 +212,7 @@ struct VFIOContIO { >> int (*dirty_bitmap)(VFIOContainer *container, >> struct vfio_iommu_type1_dirty_bitmap *bitmap, >> struct vfio_iommu_type1_dirty_bitmap_get *range); >> + void (*wait_commit)(VFIOContainer *container); >> }; >> >> #define CONT_DMA_MAP(cont, map, fd, will_commit) \ >> @@ -218,6 +221,7 @@ struct VFIOContIO { >> ((cont)->io_ops->dma_unmap((cont), (unmap), (bitmap), (will_commit))) >> #define CONT_DIRTY_BITMAP(cont, bitmap, range) \ >> ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range))) >> +#define CONT_WAIT_COMMIT(cont) ((cont)->io_ops->wait_commit(cont)) >> >> extern VFIODevIO vfio_dev_io_ioctl; >> extern VFIOContIO vfio_cont_io_ioctl; >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index fdd2702..0840c8f 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -411,6 +411,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer >> *container, >> struct vfio_iommu_type1_dma_unmap *unmap; >> struct vfio_bitmap *bitmap; >> uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size; >> + bool will_commit = container->will_commit; >> int ret; >> >> unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); >> @@ -444,7 +445,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer >> *container, >> goto unmap_exit; >> } >> >> - ret = CONT_DMA_UNMAP(container, unmap, bitmap, false); >> + ret = CONT_DMA_UNMAP(container, unmap, bitmap, will_commit); >> if (!ret) { >> cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data, >> iotlb->translated_addr, pages); >> @@ -471,16 +472,17 @@ static int vfio_dma_unmap(VFIOContainer *container, >> .iova = iova, >> .size = size, >> }; >> + bool will_commit = container->will_commit; >> >> if (iotlb && container->dirty_pages_supported && >> vfio_devices_all_running_and_saving(container)) { >> return vfio_dma_unmap_bitmap(container, iova, size, iotlb); >> } >> >> - return CONT_DMA_UNMAP(container, &unmap, NULL, false); >> + return CONT_DMA_UNMAP(container, &unmap, NULL, will_commit); > > We're passing the container, why do we need a separate will_commit arg > for these? > ok > >> } >> >> -static int vfio_dma_map(VFIOContainer *container, hwaddr iova, >> +static int vfio_dma_map(VFIOContainer *container, MemoryRegion *mr, hwaddr >> iova, >> ram_addr_t size, void *vaddr, bool readonly) >> { >> struct vfio_iommu_type1_dma_map map = { >> @@ -490,13 +492,23 @@ static int vfio_dma_map(VFIOContainer *container, >> hwaddr iova, >> .iova = iova, >> .size = size, >> }; >> - int ret; >> + int fd, ret; >> + bool will_commit = container->will_commit; >> >> if (!readonly) { >> map.flags |= VFIO_DMA_MAP_FLAG_WRITE; >> } >> >> - ret = CONT_DMA_MAP(container, &map, -1, false); >> + if (container->need_map_fd) { >> + fd = memory_region_get_fd(mr); >> + if (fd != -1) { >> + map.vaddr = qemu_ram_block_host_offset(mr->ram_block, vaddr); >> + } >> + } else { >> + fd = -1; >> + } >> + >> + ret = CONT_DMA_MAP(container, &map, fd, will_commit); > > Why were we even passing a -1 fd previously? Would it make more sense > to pass the mr and put this in the user variant .map_dma? We're going > to the trouble to pass the mr down this far here. If the map callback > handled the above fd and map.vaddr we could also avoid of the > need_map_fd flag on the container. > ok >> >> >> +static void vfio_listener_begin(MemoryListener *listener) >> +{ >> + VFIOContainer *container = container_of(listener, VFIOContainer, >> listener); >> + >> + /* cannot drop BQL during the transaction, send maps/demaps async */ >> + container->will_commit = true; >> +} >> + >> +static void vfio_listener_commit(MemoryListener *listener) >> +{ >> + VFIOContainer *container = container_of(listener, VFIOContainer, >> listener); >> + >> + /* wait for any async requests sent during the transaction */ >> + CONT_WAIT_COMMIT(container); >> + container->will_commit = false; >> +} > > Not sure I follow the semantics, when would the map/unmap callbacks get > called when will_commit is false? > If map/unmap is called outside a transaction, it can drop BQL and wait. For unmap/map inside a transaction, they're sent async, and waited for after the commit (when it’s safe to drop BQL while we wait) > Does it really make sense to have macros for ops that we call in one > place? > I did it this way so all container ops calls are done the same way. >> \ >> static const MemoryListener vfio_memory_listener = { >> + .begin = vfio_listener_begin, >> + .commit = vfio_listener_commit, >> .region_add = vfio_listener_region_add, >> .region_del = vfio_listener_region_del, >> .log_global_start = vfio_listener_log_global_start, >> @@ -1561,6 +1598,7 @@ int vfio_region_setup(Object *obj, VFIODevice >> *vbasedev, VFIORegion *region, >> region->size = info->size; >> region->fd_offset = info->offset; >> region->nr = index; >> + region->post_wr = false; > > Should this be in a different patch? It looks unrelated. > I think you said as much in the patch that introduced the region write ops call. >> region->remfd = vfio_get_region_info_remfd(vbasedev, index); >> >> if (region->size) { >> @@ -2047,6 +2085,7 @@ static int vfio_connect_container(VFIOGroup *group, >> AddressSpace *as, >> container->dirty_pages_supported = false; >> container->dma_max_mappings = 0; >> container->io_ops = &vfio_cont_io_ioctl; >> + container->need_map_fd = false; >> QLIST_INIT(&container->giommu_list); >> QLIST_INIT(&container->hostwin_list); >> QLIST_INIT(&container->vrdl_list); >> @@ -2230,6 +2269,7 @@ void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup >> *group, AddressSpace *as) >> container->space = space; >> container->fd = -1; >> container->io_ops = &vfio_cont_io_sock; >> + container->need_map_fd = (proxy->flags & VFIO_PROXY_SECURE) == 0; >> QLIST_INIT(&container->giommu_list); >> QLIST_INIT(&container->hostwin_list); >> container->proxy = proxy; >> @@ -2879,8 +2919,14 @@ static int vfio_io_dirty_bitmap(VFIOContainer >> *container, >> return ret; >> } >> >> +static void vfio_io_wait_commit(VFIOContainer *container) >> +{ >> + /* ioctl()s are synchronous */ >> +} >> + > > Maybe these should just be "dma_commit" rather than "wait_commit"? I'd > also tend to suggest "async" rather than "will_commit". > ok >> VFIOContIO vfio_cont_io_ioctl = { >> .dma_map = vfio_io_dma_map, >> .dma_unmap = vfio_io_dma_unmap, >> .dirty_bitmap = vfio_io_dirty_bitmap, >> + .wait_commit = vfio_io_wait_commit, >> }; >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index d657b01..ca821da 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -3516,6 +3516,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, >> Error **errp) >> vbasedev->proxy = proxy; >> vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev); >> >> + if (udev->secure_dma) { >> + proxy->flags |= VFIO_PROXY_SECURE; >> + } >> if (udev->send_queued) { >> proxy->flags |= VFIO_PROXY_FORCE_QUEUED; >> } >> @@ -3686,6 +3689,7 @@ static void vfio_user_instance_finalize(Object *obj) >> >> static Property vfio_user_pci_dev_properties[] = { >> DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name), >> + DEFINE_PROP_BOOL("secure-dma", VFIOUserPCIDevice, secure_dma, false), > > "secure_dma" looks entirely compartmentalized that it could be a > separate patch. Thanks, > ok JJ