Incomplete because the users of qemu_get_ram_ptr should be wrapped with rcu_read_lock/rcu_read_unlock. Happens to work because those are nops anyway. :)
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- arch_init.c | 14 +++++++ cpu-all.h | 4 ++ exec.c | 124 ++++++++++++++++++++++++++++++++++++++++------------------ 3 files changed, 103 insertions(+), 39 deletions(-) diff --git a/arch_init.c b/arch_init.c index 484b39d..f5a567b 100644 --- a/arch_init.c +++ b/arch_init.c @@ -117,6 +117,7 @@ static int ram_save_block(QEMUFile *f) ram_addr_t current_addr; int bytes_sent = 0; + rcu_read_lock(); if (!block) block = QLIST_FIRST(&ram_list.blocks); @@ -167,6 +168,7 @@ static int ram_save_block(QEMUFile *f) current_addr = block->offset + offset; } while (current_addr != last_block->offset + last_offset); + rcu_read_unlock(); last_block = block; last_offset = offset; @@ -181,6 +183,7 @@ static ram_addr_t ram_save_remaining(void) RAMBlock *block; ram_addr_t count = 0; + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { ram_addr_t addr; for (addr = block->offset; addr < block->offset + block->length; @@ -190,6 +193,7 @@ static ram_addr_t ram_save_remaining(void) } } } + rcu_read_unlock(); return count; } @@ -209,8 +213,10 @@ uint64_t ram_bytes_total(void) RAMBlock *block; uint64_t total = 0; + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) total += block->length; + rcu_read_unlock(); return total; } @@ -232,6 +238,7 @@ static void sort_ram_list(void) RAMBlock *block, *nblock, **blocks; int n; n = 0; + //qemu_mutex_lock(&ram_list.mutex); QLIST_FOREACH(block, &ram_list.blocks, next) { ++n; } @@ -245,6 +252,7 @@ static void sort_ram_list(void) while (--n >= 0) { QLIST_INSERT_HEAD(&ram_list.blocks, blocks[n], next); } + //qemu_mutex_unlock(&ram_list.mutex); qemu_free(blocks); } @@ -273,6 +281,7 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) sort_ram_list(); /* Make sure all dirty bits are set */ + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { for (addr = block->offset; addr < block->offset + block->length; addr += TARGET_PAGE_SIZE) { @@ -282,17 +291,20 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) } } } + rcu_read_unlock(); /* Enable dirty memory tracking */ cpu_physical_memory_set_dirty_tracking(1); qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); qemu_put_be64(f, block->length); } + rcu_read_unlock(); } bytes_transferred_last = bytes_transferred; @@ -374,6 +386,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) return -EINVAL; } + rcu_read_lock(); do { addr = qemu_get_be64(f); @@ -453,6 +466,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) return -EIO; } } while (!(flags & RAM_SAVE_FLAG_EOS)); + rcu_read_unlock(); return 0; } diff --git a/cpu-all.h b/cpu-all.h index 083d9e6..7ed3f75 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -21,6 +21,7 @@ #include "qemu-common.h" #include "cpu-common.h" +#include "rcu.h" /* some important defines: * @@ -475,11 +476,14 @@ extern ram_addr_t ram_size; #define RAM_PREALLOC_MASK (1 << 0) typedef struct RAMBlock { + struct rcu_head h; uint8_t *host; ram_addr_t offset; ram_addr_t length; uint32_t flags; + // should be protected by its own lock + RCU on the read side QLIST_ENTRY(RAMBlock) next; + // protected by the iothread lock + RCU to on the read side QLIST_ENTRY(RAMBlock) next_mru; char idstr[256]; #if defined(__linux__) && !defined(TARGET_S390X) diff --git a/exec.c b/exec.c index d25e3cc..6a7cec7 100644 --- a/exec.c +++ b/exec.c @@ -2935,6 +2935,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, } pstrcat(new_block->idstr, sizeof(new_block->idstr), name); + //qemu_mutex_lock(&ram_list.mutex); QLIST_FOREACH(block, &ram_list.blocks, next) { if (!strcmp(block->idstr, new_block->idstr)) { fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n", @@ -2986,6 +2987,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, new_block->length = size; QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); + //qemu_mutex_unlock(&ram_list.mutex); QLIST_INSERT_HEAD(&ram_list.blocks_mru, new_block, next_mru); ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, @@ -3008,52 +3010,62 @@ void qemu_ram_free_from_ptr(ram_addr_t addr) { RAMBlock *block; + //qemu_mutex_lock(&ram_list.mutex); QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr == block->offset) { QLIST_REMOVE(block, next); QLIST_REMOVE(block, next_mru); - qemu_free(block); + call_rcu(&block->h, rcu_free); return; } } + //qemu_mutex_unlock(&ram_list.mutex); +} + +static void rcu_free_block(struct rcu_head *h) +{ + RAMBlock *block = container_of(h, RAMBlock, h); + if (block->flags & RAM_PREALLOC_MASK) { + ; + } else if (mem_path) { +#if defined (__linux__) && !defined(TARGET_S390X) + if (block->fd) { + munmap(block->host, block->length); + close(block->fd); + } else { + qemu_vfree(block->host); + } +#else + abort(); +#endif + } else { +#if defined(TARGET_S390X) && defined(CONFIG_KVM) + munmap(block->host, block->length); +#else + if (xen_enabled()) { + xen_invalidate_map_cache_entry(block->host); + } else { + qemu_vfree(block->host); + } +#endif + } + qemu_free(block); } void qemu_ram_free(ram_addr_t addr) { RAMBlock *block; + //qemu_mutex_lock(&ram_list.mutex); QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr == block->offset) { QLIST_REMOVE(block, next); QLIST_REMOVE(block, next_mru); - if (block->flags & RAM_PREALLOC_MASK) { - ; - } else if (mem_path) { -#if defined (__linux__) && !defined(TARGET_S390X) - if (block->fd) { - munmap(block->host, block->length); - close(block->fd); - } else { - qemu_vfree(block->host); - } -#else - abort(); -#endif - } else { -#if defined(TARGET_S390X) && defined(CONFIG_KVM) - munmap(block->host, block->length); -#else - if (xen_enabled()) { - xen_invalidate_map_cache_entry(block->host); - } else { - qemu_vfree(block->host); - } -#endif - } - qemu_free(block); + call_rcu(&block->h, rcu_free_block); return; } } + //qemu_mutex_unlock(&ram_list.mutex); } @@ -3065,6 +3077,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) int flags; void *area, *vaddr; + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { offset = addr - block->offset; if (offset < block->length) { @@ -3072,8 +3085,9 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) if (block->flags & RAM_PREALLOC_MASK) { ; } else { + /* No need to munmap the memory, mmap will discard the old mapping + * atomically. */ flags = MAP_FIXED; - munmap(vaddr, length); if (mem_path) { #if defined(__linux__) && !defined(TARGET_S390X) if (block->fd) { @@ -3112,9 +3126,10 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) } qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE); } - return; + break; } } + rcu_read_unlock(); } #endif /* !_WIN32 */ @@ -3128,8 +3143,16 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) */ void *qemu_get_ram_ptr(ram_addr_t addr) { + uint8_t *p = NULL; RAMBlock *block; + /* RCU protects the "existence" of the blocks, the iothread lock + * protects the next_mru chain. This rcu_read_lock() is most + * likely nested, since the caller probably wants to do something + * with the result as well! FIXME: this is not done anywhere yet. + * Due to our RCU implementation we can avoid that, but it's not + * clean. */ + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks_mru, next_mru) { if (addr - block->offset < block->length) { /* Move this entry to to start of the list. */ @@ -3143,20 +3166,24 @@ void *qemu_get_ram_ptr(ram_addr_t addr) * In that case just map until the end of the page. */ if (block->offset == 0) { - return xen_map_cache(addr, 0, 0); + p = xen_map_cache(addr, 0, 0); + break; } else if (block->host == NULL) { block->host = xen_map_cache(block->offset, block->length, 1); } } - return block->host + (addr - block->offset); + p = block->host + (addr - block->offset); + break; } } + rcu_read_unlock(); - fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); - abort(); - - return NULL; + if (!p) { + fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); + abort(); + } + return p; } /* Return a host pointer to ram allocated with qemu_ram_alloc. @@ -3164,8 +3191,10 @@ void *qemu_get_ram_ptr(ram_addr_t addr) */ void *qemu_safe_ram_ptr(ram_addr_t addr) { + uint8_t *p = NULL; RAMBlock *block; + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr - block->offset < block->length) { if (xen_enabled()) { @@ -3174,20 +3203,25 @@ void *qemu_safe_ram_ptr(ram_addr_t addr) * In that case just map until the end of the page. */ if (block->offset == 0) { - return xen_map_cache(addr, 0, 0); + p = xen_map_cache(addr, 0, 0); + break; } else if (block->host == NULL) { block->host = xen_map_cache(block->offset, block->length, 1); } } - return block->host + (addr - block->offset); + p = block->host + (addr - block->offset); + break; } } + rcu_read_unlock(); - fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); - abort(); + if (!p) { + fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); + abort(); + } - return NULL; + return p; } /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr @@ -3202,13 +3236,16 @@ void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size) } else { RAMBlock *block; + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr - block->offset < block->length) { if (addr - block->offset + *size > block->length) *size = block->length - addr + block->offset; + rcu_read_unlock(); return block->host + (addr - block->offset); } } + rcu_read_unlock(); fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); abort(); @@ -3230,6 +3267,13 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) return 0; } + /* RCU protects the "existence" of the blocks, the iothread lock + * protects the next_mru chain. This rcu_read_lock() is most + * likely nested, since the caller probably wants to do something + * with the result as well! FIXME: this is not done anywhere yet. + * Due to our RCU implementation we can avoid that, but it's not + * clean. */ + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks_mru, next_mru) { /* This case append when the block is not mapped. */ if (block->host == NULL) { @@ -3237,9 +3281,11 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) } if (host - block->host < block->length) { *ram_addr = block->offset + (host - block->host); + rcu_read_unlock(); return 0; } } + rcu_read_unlock(); return -1; } -- 1.7.6