With the introduction of dataplane, the global bounce buffer is neither thread-safe nor multi-thread friendly. The problems are:
1) Access to "bounce" is not atomic, thus not safe from dataplane thread. 2) Access to "map_client_list" is not atomic, thus not safe from dataplane thread. 3) In dma_blk_cb, there is a race condition between: mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir); /* ... */ cpu_register_map_client(dbs, continue_after_map_failure); Bounce may become available after dma_memory_map failed but before cpu_register_map_client is called. 4) The callback registered by cpu_register_map_client is not called in the right AioContext. This patch changes the global bounce to a global set, which is protected by a mutex. This makes the dma_memory_map never returns NULL in the bounce buffer path. The dma helper behavior is changed, so update the ide dma test case too. The test used to trigger a big number of consecutive dma read (as a result of disabling "busmaster"), which will put the IDE device to "intr | active" status. After this patch, the dma read is merged to a very big SG (niov=8192), which results in -EINVAL in the final preadv(2). In this case, ide ERR bit is set. Signed-off-by: Fam Zheng <f...@redhat.com> --- exec.c | 77 +++++++++++++++++++++++++++++++++++++++----------------- tests/ide-test.c | 7 ++++-- 2 files changed, 59 insertions(+), 25 deletions(-) diff --git a/exec.c b/exec.c index 6a5adab..5943cc2 100644 --- a/exec.c +++ b/exec.c @@ -94,6 +94,42 @@ DEFINE_TLS(CPUState *, current_cpu); 2 = Adaptive rate instruction counting. */ int use_icount; +typedef struct BounceBuffer { + MemoryRegion *mr; + void *buffer; + hwaddr addr; + hwaddr len; + QLIST_ENTRY(BounceBuffer) next; +} BounceBuffer; + +static QemuMutex bounce_lock; +static GHashTable *bounce_buffers; + +static BounceBuffer *bounce_buffer_find_and_remove(void *buffer) +{ + BounceBuffer *bounce; + + qemu_mutex_lock(&bounce_lock); + bounce = g_hash_table_lookup(bounce_buffers, buffer); + if (bounce) { + g_hash_table_remove(bounce_buffers, buffer); + } + qemu_mutex_unlock(&bounce_lock); + return bounce; +} + +static inline void bounce_buffer_insert(BounceBuffer *bounce) +{ + qemu_mutex_lock(&bounce_lock); + g_hash_table_insert(bounce_buffers, bounce->buffer, bounce); + qemu_mutex_unlock(&bounce_lock); +} + +static guint bounce_buffer_hash(gconstpointer key) +{ + return *(long *)key >> 4; +} + #if !defined(CONFIG_USER_ONLY) typedef struct PhysPageEntry PhysPageEntry; @@ -433,6 +469,8 @@ void cpu_exec_init_all(void) memory_map_init(); io_mem_init(); #endif + qemu_mutex_init(&bounce_lock); + bounce_buffers = g_hash_table_new(bounce_buffer_hash, NULL); } #if !defined(CONFIG_USER_ONLY) @@ -2475,15 +2513,6 @@ void cpu_flush_icache_range(hwaddr start, int len) start, NULL, len, FLUSH_CACHE); } -typedef struct { - MemoryRegion *mr; - void *buffer; - hwaddr addr; - hwaddr len; -} BounceBuffer; - -static BounceBuffer bounce; - typedef struct MapClient { void *opaque; void (*callback)(void *opaque); @@ -2568,23 +2597,22 @@ void *address_space_map(AddressSpace *as, l = len; mr = address_space_translate(as, addr, &xlat, &l, is_write); if (!memory_access_is_direct(mr, is_write)) { - if (bounce.buffer) { - return NULL; - } + BounceBuffer *bounce = g_new(BounceBuffer, 1); /* Avoid unbounded allocations */ l = MIN(l, TARGET_PAGE_SIZE); - bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); - bounce.addr = addr; - bounce.len = l; + bounce->buffer = qemu_memalign(TARGET_PAGE_SIZE, l); + bounce->addr = addr; + bounce->len = l; memory_region_ref(mr); - bounce.mr = mr; + bounce->mr = mr; if (!is_write) { - address_space_read(as, addr, bounce.buffer, l); + address_space_read(as, addr, bounce->buffer, l); } *plen = l; - return bounce.buffer; + bounce_buffer_insert(bounce); + return bounce->buffer; } base = xlat; @@ -2617,7 +2645,10 @@ void *address_space_map(AddressSpace *as, void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, int is_write, hwaddr access_len) { - if (buffer != bounce.buffer) { + BounceBuffer *bounce; + + bounce = bounce_buffer_find_and_remove(buffer); + if (!bounce) { MemoryRegion *mr; ram_addr_t addr1; @@ -2633,11 +2664,11 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, return; } if (is_write) { - address_space_write(as, bounce.addr, bounce.buffer, access_len); + address_space_write(as, bounce->addr, bounce->buffer, access_len); } - qemu_vfree(bounce.buffer); - bounce.buffer = NULL; - memory_region_unref(bounce.mr); + qemu_vfree(bounce->buffer); + memory_region_unref(bounce->mr); + g_free(bounce); cpu_notify_map_clients(); } diff --git a/tests/ide-test.c b/tests/ide-test.c index 29f4039..5eb81ec 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -366,6 +366,7 @@ static void test_bmdma_long_prdt(void) static void test_bmdma_no_busmaster(void) { uint8_t status; + uint8_t rs; /* No PRDT_EOT, each entry addr 0/size 64k, and in theory qemu shouldn't be * able to access it anyway because the Bus Master bit in the PCI command @@ -378,8 +379,10 @@ static void test_bmdma_no_busmaster(void) /* Not entirely clear what the expected result is, but this is what we get * in practice. At least we want to be aware of any changes. */ - g_assert_cmphex(status, ==, BM_STS_ACTIVE | BM_STS_INTR); - assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR); + g_assert_cmphex(status, ==, BM_STS_INTR); + rs = inb(IDE_BASE + reg_status); + assert_bit_clear(rs, DF); + assert_bit_set(rs, ERR); } static void test_bmdma_setup(void) -- 1.9.3