Il 04/09/2013 21:41, Mike Day ha scritto: > * now passes virt-test -t qemu > * uses call_rcu instead of call_rcu1 > * completely removed the ram_list mutex and its locking functions > * cleaned up some comments > > Changes from V3: > > * added rcu reclaim function to free ram blocks > * reduced the number of comments > * made rcu locking policy consistent for different caller scenarios > * rcu updates to ram_block now are protected only by the iothread mutex > > Changes from V1: > > * Omitted locks or rcu critical sections within Some functions that > read or write the ram_list but are called in a protected context > (the caller holds the iothread lock, the ram_list mutex, or an rcu > critical section). > > Allow "unlocked" reads of the ram_list by using an RCU-enabled > DQ. Most readers of the list no longer require holding the list mutex. > > The ram_list now uses a QLIST instead of a QTAILQ. The difference is > minimal. > > This patch has been built and make-checked for the x86_64, ppc64, > s390x, and arm targets. It has been virt-tested using KVM -t qemu and > passes 15/15 migration tests. > > To apply this patch, you must base upon Paolo Bonzini's rcu tree and > also apply the RCU DQ patch (below).
Looks good, just a couple of comments but nothing I cannot apply in my tree. I will push the new RCU tree tomorrow. Would you look at adding memory barriers before all updates of ram_block.version next? Paolo > https://github.com/bonzini/qemu/tree/rcu > http://article.gmane.org/gmane.comp.emulators.qemu/230159/ > > Signed-off-by: Mike Day <ncm...@ncultra.org> > --- > arch_init.c | 96 +++++++++++++++++----------- > exec.c | 162 > +++++++++++++++++++++++++---------------------- > include/exec/cpu-all.h | 13 ++-- > include/qemu/rcu_queue.h | 8 +++ > 4 files changed, 160 insertions(+), 119 deletions(-) > > diff --git a/arch_init.c b/arch_init.c > index 68a7ab7..76ef5c9 100644 > --- a/arch_init.c > +++ b/arch_init.c > @@ -49,6 +49,7 @@ > #include "trace.h" > #include "exec/cpu-all.h" > #include "hw/acpi/acpi.h" > +#include "qemu/rcu_queue.h" > > #ifdef DEBUG_ARCH_INIT > #define DPRINTF(fmt, ...) \ > @@ -398,7 +399,8 @@ static void migration_bitmap_sync(void) > trace_migration_bitmap_sync_start(); > address_space_sync_dirty_bitmap(&address_space_memory); > > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + rcu_read_lock(); > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { > if (memory_region_test_and_clear_dirty(block->mr, > addr, TARGET_PAGE_SIZE, > @@ -407,6 +409,8 @@ static void migration_bitmap_sync(void) > } > } > } > + rcu_read_unlock(); > + > trace_migration_bitmap_sync_end(migration_dirty_pages > - num_dirty_pages_init); > num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; > @@ -446,6 +450,8 @@ static void migration_bitmap_sync(void) > * > * Returns: The number of bytes written. > * 0 means no dirty pages > + * > + * assumes that the caller has rcu-locked the ram_list > */ > > static int ram_save_block(QEMUFile *f, bool last_stage) > @@ -458,7 +464,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage) > ram_addr_t current_addr; > > if (!block) > - block = QTAILQ_FIRST(&ram_list.blocks); > + block = QLIST_FIRST_RCU(&ram_list.blocks); > > while (true) { > mr = block->mr; > @@ -469,9 +475,9 @@ static int ram_save_block(QEMUFile *f, bool last_stage) > } > if (offset >= block->length) { > offset = 0; > - block = QTAILQ_NEXT(block, next); > + block = QLIST_NEXT_RCU(block, next); > if (!block) { > - block = QTAILQ_FIRST(&ram_list.blocks); > + block = QLIST_FIRST_RCU(&ram_list.blocks); > complete_round = true; > ram_bulk_stage = false; > } > @@ -565,10 +571,10 @@ uint64_t ram_bytes_total(void) > { > RAMBlock *block; > uint64_t total = 0; > - > - QTAILQ_FOREACH(block, &ram_list.blocks, next) > + rcu_read_lock(); > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) > total += block->length; > - > + rcu_read_unlock(); > return total; > } > > @@ -602,10 +608,20 @@ static void reset_ram_globals(void) > last_offset = 0; > last_version = ram_list.version; > ram_bulk_stage = true; > + smp_wmb(); No need for this here; you need a smp_rmb() instead. You would need smp_wmb before all increments of ram_list.version. See docs/atomic.txt, read/write barriers come in pairs and accesses are in the opposite order. Here we read version before block, with a smp_rmb() in the middle; elsewhere we write version after block, with a smp_wmb() in the middle. I'll remove this smp_wmb() when I commit. > } > > #define MAX_WAIT 50 /* ms, half buffered_file limit */ > > + > +/* ram_save_* functions each implement a long-running RCU critical > + * section. When rcu-reclaims in the code start to become numerous > + * it will be necessary to reduce the granularity of these critical > + * sections. > + * > + * (ram_save_setup, ram_save_iterate, and ram_save_complete.) > + */ > + > static int ram_save_setup(QEMUFile *f, void *opaque) > { > RAMBlock *block; > @@ -631,7 +647,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) > } > > qemu_mutex_lock_iothread(); > - qemu_mutex_lock_ramlist(); > + rcu_read_lock(); > bytes_transferred = 0; > reset_ram_globals(); > > @@ -641,13 +657,13 @@ static int ram_save_setup(QEMUFile *f, void *opaque) > > qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); > > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > qemu_put_byte(f, strlen(block->idstr)); > qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); > qemu_put_be64(f, block->length); > } > > - qemu_mutex_unlock_ramlist(); > + rcu_read_unlock(); > > ram_control_before_iterate(f, RAM_CONTROL_SETUP); > ram_control_after_iterate(f, RAM_CONTROL_SETUP); > @@ -664,16 +680,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) > int64_t t0; > int total_sent = 0; > > - qemu_mutex_lock_ramlist(); > - > - if (ram_list.version != last_version) { > + if (atomic_rcu_read(&ram_list.version) != last_version) { > reset_ram_globals(); > } > > ram_control_before_iterate(f, RAM_CONTROL_ROUND); > - > t0 = qemu_get_clock_ns(rt_clock); > i = 0; > + rcu_read_lock(); > while ((ret = qemu_file_rate_limit(f)) == 0) { > int bytes_sent; > > @@ -700,9 +714,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) > } > i++; > } > - > - qemu_mutex_unlock_ramlist(); > - > + rcu_read_unlock(); > /* > * Must occur before EOS (or any QEMUFile operation) > * because of RDMA protocol. > @@ -723,13 +735,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) > > static int ram_save_complete(QEMUFile *f, void *opaque) > { > - qemu_mutex_lock_ramlist(); > + > migration_bitmap_sync(); > > ram_control_before_iterate(f, RAM_CONTROL_FINISH); > > /* try transferring iterative blocks of memory */ > > + rcu_read_lock(); > /* flush all remaining blocks regardless of rate limiting */ > while (true) { > int bytes_sent; > @@ -741,11 +754,10 @@ static int ram_save_complete(QEMUFile *f, void *opaque) > } > bytes_transferred += bytes_sent; > } > - > + rcu_read_unlock(); > ram_control_after_iterate(f, RAM_CONTROL_FINISH); > migration_end(); > > - qemu_mutex_unlock_ramlist(); > qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > return 0; > @@ -807,6 +819,9 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void > *host) > return rc; > } > > +/* Must be called from within a rcu critical section. > + * Returns a pointer from within the RCU-protected ram_list. > + */ > static inline void *host_from_stream_offset(QEMUFile *f, > ram_addr_t offset, > int flags) > @@ -828,9 +843,10 @@ static inline void *host_from_stream_offset(QEMUFile *f, > qemu_get_buffer(f, (uint8_t *)id, len); > id[len] = 0; > > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > - if (!strncmp(id, block->idstr, sizeof(id))) > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > + if (!strncmp(id, block->idstr, sizeof(id))) { > return memory_region_get_ram_ptr(block->mr) + offset; > + } > } > > fprintf(stderr, "Can't find block %s!\n", id); > @@ -867,7 +883,12 @@ static int ram_load(QEMUFile *f, void *opaque, int > version_id) > if (version_id < 4 || version_id > 4) { > return -EINVAL; > } > - > + /* this implements a long-running RCU critical section. > + * When rcu reclaims in the code start to become numerous > + * it will be necessary to reduce the granularity of this critical > + * section. > + */ > + rcu_read_lock(); > do { > addr = qemu_get_be64(f); > > @@ -889,21 +910,19 @@ static int ram_load(QEMUFile *f, void *opaque, int > version_id) > qemu_get_buffer(f, (uint8_t *)id, len); > id[len] = 0; > length = qemu_get_be64(f); > - > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > if (!strncmp(id, block->idstr, sizeof(id))) { > if (block->length != length) { > fprintf(stderr, > "Length mismatch: %s: " RAM_ADDR_FMT > " in != " RAM_ADDR_FMT "\n", id, > length, > block->length); > - ret = -EINVAL; > + ret = -EINVAL; > goto done; > } > break; > } > } > - > if (!block) { > fprintf(stderr, "Unknown ramblock \"%s\", cannot " > "accept migration\n", id); > @@ -916,30 +935,30 @@ static int ram_load(QEMUFile *f, void *opaque, int > version_id) > } > } > > + /* Call host_from_stream_offset while holding an rcu read lock. > + * It returns a pointer from within the rcu-protected ram_list. > + */ > if (flags & RAM_SAVE_FLAG_COMPRESS) { > - void *host; > uint8_t ch; > - > - host = host_from_stream_offset(f, addr, flags); > + void *host = host_from_stream_offset(f, addr, flags); > if (!host) { > - return -EINVAL; > + ret = -EINVAL; > + goto done; > } > - > ch = qemu_get_byte(f); > ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); > } else if (flags & RAM_SAVE_FLAG_PAGE) { > - void *host; > - > - host = host_from_stream_offset(f, addr, flags); > + void *host = host_from_stream_offset(f, addr, flags); > if (!host) { > - return -EINVAL; > + ret = -EINVAL; > + goto done; > } > - > qemu_get_buffer(f, host, TARGET_PAGE_SIZE); > } else if (flags & RAM_SAVE_FLAG_XBZRLE) { > void *host = host_from_stream_offset(f, addr, flags); > if (!host) { > - return -EINVAL; > + ret = -EINVAL; > + goto done; > } > > if (load_xbzrle(f, addr, host) < 0) { > @@ -957,6 +976,7 @@ static int ram_load(QEMUFile *f, void *opaque, int > version_id) > } while (!(flags & RAM_SAVE_FLAG_EOS)); > > done: > + rcu_read_unlock(); > DPRINTF("Completed load of VM with exit code %d seq iteration " > "%" PRIu64 "\n", ret, seq_iter); > return ret; > diff --git a/exec.c b/exec.c > index 5eebcc1..4b7605a 100644 > --- a/exec.c > +++ b/exec.c > @@ -46,7 +46,7 @@ > #endif > #include "exec/cpu-all.h" > #include "qemu/tls.h" > - > +#include "qemu/rcu_queue.h" > #include "exec/cputlb.h" > #include "translate-all.h" > > @@ -57,7 +57,10 @@ > #if !defined(CONFIG_USER_ONLY) > static int in_migration; > > -RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) }; > +/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes > + * are protected by a lock, currently the iothread lock. > + */ > +RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) }; > > static MemoryRegion *system_memory; > static MemoryRegion *system_io; > @@ -329,7 +332,6 @@ void cpu_exec_init_all(void) > { > tls_alloc_current_cpu_var(); > #if !defined(CONFIG_USER_ONLY) > - qemu_mutex_init(&ram_list.mutex); > memory_map_init(); > io_mem_init(); > #endif > @@ -901,16 +903,6 @@ void qemu_flush_coalesced_mmio_buffer(void) > kvm_flush_coalesced_mmio_buffer(); > } > > -void qemu_mutex_lock_ramlist(void) > -{ > - qemu_mutex_lock(&ram_list.mutex); > -} > - > -void qemu_mutex_unlock_ramlist(void) > -{ > - qemu_mutex_unlock(&ram_list.mutex); > -} > - > #if defined(__linux__) && !defined(TARGET_S390X) > > #include <sys/vfs.h> > @@ -1021,17 +1013,24 @@ static ram_addr_t find_ram_offset(ram_addr_t size) > RAMBlock *block, *next_block; > ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX; > > + /* ram_list must be protected by a mutex (for writes), or > + * an rcu critical section (for reads). Currently this code > + * is called with the iothread lock held. If that changes, > + * make sure to protect ram_list with an rcu critical section. > + */ > + > assert(size != 0); /* it would hand out same offset multiple times */ > > - if (QTAILQ_EMPTY(&ram_list.blocks)) > + if (QLIST_EMPTY_RCU(&ram_list.blocks)) { > return 0; > + } > > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > ram_addr_t end, next = RAM_ADDR_MAX; > > end = block->offset + block->length; > > - QTAILQ_FOREACH(next_block, &ram_list.blocks, next) { > + QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) { > if (next_block->offset >= end) { > next = MIN(next, next_block->offset); > } > @@ -1056,9 +1055,11 @@ ram_addr_t last_ram_offset(void) > RAMBlock *block; > ram_addr_t last = 0; > > - QTAILQ_FOREACH(block, &ram_list.blocks, next) > + rcu_read_lock(); > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > last = MAX(last, block->offset + block->length); > - > + } > + rcu_read_unlock(); > return last; > } > > @@ -1083,7 +1084,12 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char > *name, DeviceState *dev) > RAMBlock *new_block, *block; > > new_block = NULL; > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + > + /* Assumes that the iothread lock is taken ... if that changes, > + * add an rcu_read_lock()/unlock pair when traversing the > + * ram list > + */ > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > if (block->offset == addr) { > new_block = block; > break; > @@ -1102,15 +1108,13 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char > *name, DeviceState *dev) > pstrcat(new_block->idstr, sizeof(new_block->idstr), name); > > /* This assumes the iothread lock is taken here too. */ > - qemu_mutex_lock_ramlist(); > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > if (block != new_block && !strcmp(block->idstr, new_block->idstr)) { > fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n", > new_block->idstr); > abort(); > } > } > - qemu_mutex_unlock_ramlist(); > } > > static int memory_try_enable_merging(void *addr, size_t len) > @@ -1123,16 +1127,16 @@ static int memory_try_enable_merging(void *addr, > size_t len) > return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); > } > > +/* Called with the iothread lock being held */ > + > ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, > MemoryRegion *mr) > { > - RAMBlock *block, *new_block; > + RAMBlock *block, *new_block, *last_block = 0; > > size = TARGET_PAGE_ALIGN(size); > new_block = g_malloc0(sizeof(*new_block)); > > - /* This assumes the iothread lock is taken here too. */ > - qemu_mutex_lock_ramlist(); > new_block->mr = mr; > new_block->offset = find_ram_offset(size); > if (host) { > @@ -1165,21 +1169,24 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, > void *host, > new_block->length = size; > > /* Keep the list sorted from biggest to smallest block. */ > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > + last_block = block; > if (block->length < new_block->length) { > break; > } > } > if (block) { > - QTAILQ_INSERT_BEFORE(block, new_block, next); > + QLIST_INSERT_BEFORE_RCU(block, new_block, next); > } else { > - QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next); > + if (last_block) { > + QLIST_INSERT_AFTER_RCU(last_block, new_block, next); > + } else { /* list is empty */ > + QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next); > + } > } > ram_list.mru_block = NULL; > > ram_list.version++; > - qemu_mutex_unlock_ramlist(); > - > ram_list.phys_dirty = g_realloc(ram_list.phys_dirty, > last_ram_offset() >> > TARGET_PAGE_BITS); > memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), > @@ -1204,31 +1211,29 @@ void qemu_ram_free_from_ptr(ram_addr_t addr) > { > RAMBlock *block; > > - /* This assumes the iothread lock is taken here too. */ > - qemu_mutex_lock_ramlist(); > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + /* This assumes the iothread lock is taken here too. */ > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > if (addr == block->offset) { > - QTAILQ_REMOVE(&ram_list.blocks, block, next); > + QLIST_REMOVE_RCU(block, next); > ram_list.mru_block = NULL; > ram_list.version++; > - g_free(block); > + call_rcu(block, (void (*)(struct RAMBlock *))g_free, rcu); Hmm, I'll probably reintroduce reclaim_ram_block, while keeping call_rcu's well-typedness. > break; > } > } > - qemu_mutex_unlock_ramlist(); > } > > +/* called with the iothread lock held */ > void qemu_ram_free(ram_addr_t addr) > { > RAMBlock *block; > > - /* This assumes the iothread lock is taken here too. */ > - qemu_mutex_lock_ramlist(); > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > if (addr == block->offset) { > - QTAILQ_REMOVE(&ram_list.blocks, block, next); > + QLIST_REMOVE_RCU(block, next); > ram_list.mru_block = NULL; > - ram_list.version++; > + int ram_list_version = atomic_rcu_read(&ram_list.version); > + atomic_rcu_set(&ram_list.version, ++ram_list_version); > if (block->flags & RAM_PREALLOC_MASK) { > ; > } else if (mem_path) { > @@ -1249,12 +1254,10 @@ void qemu_ram_free(ram_addr_t addr) > qemu_anon_ram_free(block->host, block->length); > } > } > - g_free(block); > + call_rcu(block, (void (*)(struct RAMBlock *))g_free, rcu); > break; > } > } > - qemu_mutex_unlock_ramlist(); > - > } > > #ifndef _WIN32 > @@ -1265,7 +1268,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) > int flags; > void *area, *vaddr; > > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > offset = addr - block->offset; > if (offset < block->length) { > vaddr = block->host + offset; > @@ -1313,7 +1316,6 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) > memory_try_enable_merging(vaddr, length); > qemu_ram_setup_dump(vaddr, length); > } > - return; > } > } > } > @@ -1323,23 +1325,21 @@ static RAMBlock *qemu_get_ram_block(ram_addr_t addr) > { > RAMBlock *block; > > - /* The list is protected by the iothread lock here. */ > + /* This assumes the iothread lock is taken here too. */ > + > block = ram_list.mru_block; > if (block && addr - block->offset < block->length) { > - goto found; > + return block; > } > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > if (addr - block->offset < block->length) { > - goto found; > + atomic_rcu_set(&ram_list.mru_block, block); I think this is not needed, block was already published into the block list. What is important is to order mru_block == NULL so that it happens before the node is removed. Which we don't do, but we can fix later. > + return block; > } > } > > fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); > abort(); > - > -found: > - ram_list.mru_block = block; > - return block; > } > > /* Return a host pointer to ram allocated with qemu_ram_alloc. > @@ -1378,8 +1378,8 @@ static void *qemu_safe_ram_ptr(ram_addr_t addr) > { > RAMBlock *block; > > - /* The list is protected by the iothread lock here. */ > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + /* This assumes the iothread lock is taken here too. */ > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > if (addr - block->offset < block->length) { > if (xen_enabled()) { > /* We need to check if the requested address is in the RAM > @@ -1399,14 +1399,19 @@ static void *qemu_safe_ram_ptr(ram_addr_t addr) > > fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); > abort(); > - > return NULL; > } > > /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr > - * but takes a size argument */ > + * but takes a size argument > + * > + * The returned pointer is not protected by RCU so the caller > + * must have other means of protecting the pointer, such as a > + * reference. > + * */ > static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) > { > + void *ptr = NULL; > if (*size == 0) { > return NULL; > } > @@ -1414,12 +1419,14 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, > hwaddr *size) > return xen_map_cache(addr, *size, 1); > } else { > RAMBlock *block; > - > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + rcu_read_lock(); > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > if (addr - block->offset < block->length) { > if (addr - block->offset + *size > block->length) > *size = block->length - addr + block->offset; > - return block->host + (addr - block->offset); > + ptr = block->host + (addr - block->offset); > + rcu_read_unlock(); > + return ptr; > } > } > > @@ -1429,37 +1436,43 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, > hwaddr *size) > } > > /* Some of the softmmu routines need to translate from a host pointer > - (typically a TLB entry) back to a ram offset. */ > + * (typically a TLB entry) back to a ram offset. > + * > + * Note that the returned MemoryRegion is not RCU-protected. > + */ > MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) > { > RAMBlock *block; > uint8_t *host = ptr; > + MemoryRegion *mr = NULL; > > if (xen_enabled()) { > *ram_addr = xen_ram_addr_from_mapcache(ptr); > return qemu_get_ram_block(*ram_addr)->mr; > } > - > - block = ram_list.mru_block; > + rcu_read_lock(); > + block = atomic_rcu_read(&ram_list.mru_block); Good catch! Paolo > if (block && block->host && host - block->host < block->length) { > - goto found; > + *ram_addr = block->offset + (host - block->host); > + mr = block->mr; > + goto unlock_out; > } > > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > /* This case append when the block is not mapped. */ > if (block->host == NULL) { > continue; > } > if (host - block->host < block->length) { > - goto found; > + *ram_addr = block->offset + (host - block->host); > + mr = block->mr; > + goto unlock_out; > } > } > > - return NULL; > - > -found: > - *ram_addr = block->offset + (host - block->host); > - return block->mr; > +unlock_out: > + rcu_read_unlock(); > + return mr; > } > > static void notdirty_mem_write(void *opaque, hwaddr ram_addr, > @@ -2709,9 +2722,10 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr) > void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) > { > RAMBlock *block; > - > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + rcu_read_lock(); > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > func(block->host, block->offset, block->length, opaque); > } > + rcu_read_unlock(); > } > #endif > diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h > index e088089..e68b86b 100644 > --- a/include/exec/cpu-all.h > +++ b/include/exec/cpu-all.h > @@ -24,6 +24,7 @@ > #include "qemu/thread.h" > #include "qom/cpu.h" > #include "qemu/tls.h" > +#include "qemu/rcu.h" > > /* some important defines: > * > @@ -448,28 +449,26 @@ extern ram_addr_t ram_size; > #define RAM_PREALLOC_MASK (1 << 0) > > typedef struct RAMBlock { > + struct rcu_head rcu; > struct MemoryRegion *mr; > uint8_t *host; > ram_addr_t offset; > ram_addr_t length; > uint32_t flags; > char idstr[256]; > - /* Reads can take either the iothread or the ramlist lock. > - * Writes must take both locks. > - */ > - QTAILQ_ENTRY(RAMBlock) next; > + /* Writes must hold the iothread lock */ > + QLIST_ENTRY(RAMBlock) next; > #if defined(__linux__) && !defined(TARGET_S390X) > int fd; > #endif > } RAMBlock; > > typedef struct RAMList { > - QemuMutex mutex; > /* Protected by the iothread lock. */ > uint8_t *phys_dirty; > RAMBlock *mru_block; > - /* Protected by the ramlist lock. */ > - QTAILQ_HEAD(, RAMBlock) blocks; > + /* RCU-enabled, writes protected by the iothread lock. */ > + QLIST_HEAD(, RAMBlock) blocks; > uint32_t version; > } RAMList; > extern RAMList ram_list; > diff --git a/include/qemu/rcu_queue.h b/include/qemu/rcu_queue.h > index e2b8ba5..d159850 100644 > --- a/include/qemu/rcu_queue.h > +++ b/include/qemu/rcu_queue.h > @@ -37,6 +37,14 @@ > extern "C" { > #endif > > + > +/* > + * List access methods. > + */ > +#define QLIST_EMPTY_RCU(head) (atomic_rcu_read(&(head)->lh_first) == NULL) > +#define QLIST_FIRST_RCU(head) (atomic_rcu_read(&(head)->lh_first)) > +#define QLIST_NEXT_RCU(elm, field) (atomic_rcu_read(&(elm)->field.le_next)) > + > /* > * List functions. > */ >