On Mon, Aug 19, 2024 at 06:54:54AM -0700, Mattias Nissler wrote: > When DMA memory can't be directly accessed, as is the case when > running the device model in a separate process without shareable DMA > file descriptors, bounce buffering is used. > > It is not uncommon for device models to request mapping of several DMA > regions at the same time. Examples include: > * net devices, e.g. when transmitting a packet that is split across > several TX descriptors (observed with igb) > * USB host controllers, when handling a packet with multiple data TRBs > (observed with xhci) > > Previously, qemu only provided a single bounce buffer per AddressSpace > and would fail DMA map requests while the buffer was already in use. In > turn, this would cause DMA failures that ultimately manifest as hardware > errors from the guest perspective. > > This change allocates DMA bounce buffers dynamically instead of > supporting only a single buffer. Thus, multiple DMA mappings work > correctly also when RAM can't be mmap()-ed. > > The total bounce buffer allocation size is limited individually for each > AddressSpace. The default limit is 4096 bytes, matching the previous > maximum buffer size. A new x-max-bounce-buffer-size parameter is > provided to configure the limit for PCI devices. > > Signed-off-by: Mattias Nissler <mniss...@rivosinc.com> > Reviewed-by: Philippe Mathieu-Daudé <phi...@linaro.org> > Acked-by: Peter Xu <pet...@redhat.com> > --- > This patch is split out from my "Support message-based DMA in vfio-user > server" > series. With the series having been partially applied, I'm splitting this one > out as the only remaining patch to system emulation code in the hope to > simplify getting it landed. The code has previously been reviewed by Stefan > Hajnoczi and Peter Xu. This latest version includes changes to switch the > bounce buffer size bookkeeping to `size_t` as requested and LGTM'd by Phil in > v9. > --- > hw/pci/pci.c | 8 ++++ > include/exec/memory.h | 14 +++---- > include/hw/pci/pci_device.h | 3 ++ > system/memory.c | 5 ++- > system/physmem.c | 82 ++++++++++++++++++++++++++----------- > 5 files changed, 76 insertions(+), 36 deletions(-) > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c > index fab86d0567..d2caf3ee8b 100644 > --- a/hw/pci/pci.c > +++ b/hw/pci/pci.c > @@ -85,6 +85,8 @@ static Property pci_props[] = { > QEMU_PCIE_ERR_UNC_MASK_BITNR, true), > DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present, > QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), > + DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice, > + max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE), > DEFINE_PROP_END_OF_LIST() > }; >
I'm a bit puzzled by now there being two fields named max_bounce_buffer_size, one directly controllable by a property. Pls add code comments explaining how they are related. Also, what is the point of adding a property without making it part of an API? No one will be able to rely on it working. > @@ -1204,6 +1206,8 @@ static PCIDevice *do_pci_register_device(PCIDevice > *pci_dev, > "bus master container", UINT64_MAX); > address_space_init(&pci_dev->bus_master_as, > &pci_dev->bus_master_container_region, pci_dev->name); > + pci_dev->bus_master_as.max_bounce_buffer_size = > + pci_dev->max_bounce_buffer_size; > > if (phase_check(PHASE_MACHINE_READY)) { > pci_init_bus_master(pci_dev); > @@ -2633,6 +2637,10 @@ static void pci_device_class_init(ObjectClass *klass, > void *data) > k->unrealize = pci_qdev_unrealize; > k->bus_type = TYPE_PCI_BUS; > device_class_set_props(k, pci_props); > + object_class_property_set_description( > + klass, "x-max-bounce-buffer-size", > + "Maximum buffer size allocated for bounce buffers used for mapped " > + "access to indirect DMA memory"); > } > > static void pci_device_class_base_init(ObjectClass *klass, void *data) > diff --git a/include/exec/memory.h b/include/exec/memory.h > index 296fd068c0..e5e865d1a9 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -1084,13 +1084,7 @@ typedef struct AddressSpaceMapClient { > QLIST_ENTRY(AddressSpaceMapClient) link; > } AddressSpaceMapClient; > > -typedef struct { > - MemoryRegion *mr; > - void *buffer; > - hwaddr addr; > - hwaddr len; > - bool in_use; > -} BounceBuffer; > +#define DEFAULT_MAX_BOUNCE_BUFFER_SIZE (4096) > > /** > * struct AddressSpace: describes a mapping of addresses to #MemoryRegion > objects > @@ -1110,8 +1104,10 @@ struct AddressSpace { > QTAILQ_HEAD(, MemoryListener) listeners; > QTAILQ_ENTRY(AddressSpace) address_spaces_link; > > - /* Bounce buffer to use for this address space. */ > - BounceBuffer bounce; > + /* Maximum DMA bounce buffer size used for indirect memory map requests > */ > + size_t max_bounce_buffer_size; > + /* Total size of bounce buffers currently allocated, atomically accessed > */ > + size_t bounce_buffer_size; > /* List of callbacks to invoke when buffers free up */ > QemuMutex map_client_list_lock; > QLIST_HEAD(, AddressSpaceMapClient) map_client_list; > diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h > index 15694f2489..91df40f989 100644 > --- a/include/hw/pci/pci_device.h > +++ b/include/hw/pci/pci_device.h > @@ -167,6 +167,9 @@ struct PCIDevice { > /* ID of standby device in net_failover pair */ > char *failover_pair_id; > uint32_t acpi_index; > + > + /* Maximum DMA bounce buffer size used for indirect memory map requests > */ > + uint32_t max_bounce_buffer_size; > }; > > static inline int pci_intx(PCIDevice *pci_dev) > diff --git a/system/memory.c b/system/memory.c > index 5e6eb459d5..f6f6fee6d8 100644 > --- a/system/memory.c > +++ b/system/memory.c > @@ -3148,7 +3148,8 @@ void address_space_init(AddressSpace *as, MemoryRegion > *root, const char *name) > as->ioeventfds = NULL; > QTAILQ_INIT(&as->listeners); > QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link); > - as->bounce.in_use = false; > + as->max_bounce_buffer_size = DEFAULT_MAX_BOUNCE_BUFFER_SIZE; > + as->bounce_buffer_size = 0; > qemu_mutex_init(&as->map_client_list_lock); > QLIST_INIT(&as->map_client_list); > as->name = g_strdup(name ? name : "anonymous"); > @@ -3158,7 +3159,7 @@ void address_space_init(AddressSpace *as, MemoryRegion > *root, const char *name) > > static void do_address_space_destroy(AddressSpace *as) > { > - assert(!qatomic_read(&as->bounce.in_use)); > + assert(qatomic_read(&as->bounce_buffer_size) == 0); > assert(QLIST_EMPTY(&as->map_client_list)); > qemu_mutex_destroy(&as->map_client_list_lock); > > diff --git a/system/physmem.c b/system/physmem.c > index 94600a33ec..971bfa0855 100644 > --- a/system/physmem.c > +++ b/system/physmem.c > @@ -3095,6 +3095,20 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len) > NULL, len, FLUSH_CACHE); > } > > +/* > + * A magic value stored in the first 8 bytes of the bounce buffer struct. > Used > + * to detect illegal pointers passed to address_space_unmap. > + */ > +#define BOUNCE_BUFFER_MAGIC 0xb4017ceb4ffe12ed > + > +typedef struct { > + uint64_t magic; > + MemoryRegion *mr; > + hwaddr addr; > + size_t len; > + uint8_t buffer[]; > +} BounceBuffer; > + > static void > address_space_unregister_map_client_do(AddressSpaceMapClient *client) > { > @@ -3120,9 +3134,9 @@ void address_space_register_map_client(AddressSpace > *as, QEMUBH *bh) > QEMU_LOCK_GUARD(&as->map_client_list_lock); > client->bh = bh; > QLIST_INSERT_HEAD(&as->map_client_list, client, link); > - /* Write map_client_list before reading in_use. */ > + /* Write map_client_list before reading bounce_buffer_size. */ > smp_mb(); > - if (!qatomic_read(&as->bounce.in_use)) { > + if (qatomic_read(&as->bounce_buffer_size) < as->max_bounce_buffer_size) { > address_space_notify_map_clients_locked(as); > } > } > @@ -3251,28 +3265,40 @@ void *address_space_map(AddressSpace *as, > mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); > > if (!memory_access_is_direct(mr, is_write)) { > - if (qatomic_xchg(&as->bounce.in_use, true)) { > + size_t used = qatomic_read(&as->bounce_buffer_size); > + for (;;) { > + hwaddr alloc = MIN(as->max_bounce_buffer_size - used, l); > + size_t new_size = used + alloc; > + size_t actual = > + qatomic_cmpxchg(&as->bounce_buffer_size, used, new_size); > + if (actual == used) { > + l = alloc; > + break; > + } > + used = actual; > + } > + > + if (l == 0) { > *plen = 0; > return NULL; > } > - /* Avoid unbounded allocations */ > - l = MIN(l, TARGET_PAGE_SIZE); > - as->bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); > - as->bounce.addr = addr; > - as->bounce.len = l; > > + BounceBuffer *bounce = g_malloc0(l + sizeof(BounceBuffer)); > + bounce->magic = BOUNCE_BUFFER_MAGIC; > memory_region_ref(mr); > - as->bounce.mr = mr; > + bounce->mr = mr; > + bounce->addr = addr; > + bounce->len = l; > + > if (!is_write) { > flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, > - as->bounce.buffer, l); > + bounce->buffer, l); > } > > *plen = l; > - return as->bounce.buffer; > + return bounce->buffer; > } > > - > memory_region_ref(mr); > *plen = flatview_extend_translation(fv, addr, len, mr, xlat, > l, is_write, attrs); > @@ -3287,12 +3313,11 @@ void *address_space_map(AddressSpace *as, > void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, > bool is_write, hwaddr access_len) > { > - if (buffer != as->bounce.buffer) { > - MemoryRegion *mr; > - ram_addr_t addr1; > + MemoryRegion *mr; > + ram_addr_t addr1; > > - mr = memory_region_from_host(buffer, &addr1); > - assert(mr != NULL); > + mr = memory_region_from_host(buffer, &addr1); > + if (mr != NULL) { > if (is_write) { > invalidate_and_set_dirty(mr, addr1, access_len); > } > @@ -3302,15 +3327,22 @@ void address_space_unmap(AddressSpace *as, void > *buffer, hwaddr len, > memory_region_unref(mr); > return; > } > + > + > + BounceBuffer *bounce = container_of(buffer, BounceBuffer, buffer); > + assert(bounce->magic == BOUNCE_BUFFER_MAGIC); > + > if (is_write) { > - address_space_write(as, as->bounce.addr, MEMTXATTRS_UNSPECIFIED, > - as->bounce.buffer, access_len); > - } > - qemu_vfree(as->bounce.buffer); > - as->bounce.buffer = NULL; > - memory_region_unref(as->bounce.mr); > - /* Clear in_use before reading map_client_list. */ > - qatomic_set_mb(&as->bounce.in_use, false); > + address_space_write(as, bounce->addr, MEMTXATTRS_UNSPECIFIED, > + bounce->buffer, access_len); > + } > + > + qatomic_sub(&as->bounce_buffer_size, bounce->len); > + bounce->magic = ~BOUNCE_BUFFER_MAGIC; > + memory_region_unref(bounce->mr); > + g_free(bounce); > + /* Write bounce_buffer_size before reading map_client_list. */ > + smp_mb(); > address_space_notify_map_clients(as); > } > > -- > 2.34.1