On Fri, Mar 25, 2022 at 06:40:13PM +0300, Andrey Ryabinin wrote: > The sequence of ram_block_add()/qemu_ram_free()/ram_block_add() > function calls leads to leaking some memory. > > ram_block_add() calls dirty_memory_extend() to allocate bitmap blocks > for new memory. These blocks only grow but never shrink. So the > qemu_ram_free() restores RAM size back to it's original stat but > doesn't touch dirty memory bitmaps. > > After qemu_ram_free() there is no way of knowing that we have > allocated dirty memory bitmaps beyond current RAM size. > So the next ram_block_add() will call dirty_memory_extend() again to > to allocate new bitmaps and rewrite pointers to bitmaps left after > the first ram_block_add()/dirty_memory_extend() calls. > > Rework dirty_memory_extend() to be able to shrink dirty maps, > also rename it to dirty_memory_resize(). And fix the leak by > shrinking dirty memory maps on qemu_ram_free() if needed. > > Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM > hotplug") > Cc: qemu-sta...@nongnu.org > Signed-off-by: Andrey Ryabinin <a...@yandex-team.com> > --- > include/exec/ramlist.h | 2 ++ > softmmu/physmem.c | 38 ++++++++++++++++++++++++++++++++------ > 2 files changed, 34 insertions(+), 6 deletions(-) > > diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h > index 2ad2a81acc..019e238e7c 100644 > --- a/include/exec/ramlist.h > +++ b/include/exec/ramlist.h > @@ -41,6 +41,8 @@ typedef struct RAMBlockNotifier RAMBlockNotifier; > #define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8) > typedef struct { > struct rcu_head rcu; > + unsigned int nr_blocks;
Nit: How about renaming it to nr_blocks_allocated? It's much harder to identify this with the _inuse below otherwise.. It'll be great if there're comments explaining the two fields. > + unsigned int nr_blocks_inuse; If there'll be comment, we should definitely mark out that this variable is only set when a new array will be replacing this one. IOW, this field is not valid during most lifecycle of this structure, iiuc. And that's very not obvious.. > unsigned long *blocks[]; > } DirtyMemoryBlocks; > > diff --git a/softmmu/physmem.c b/softmmu/physmem.c > index 32f76362bf..073ab37351 100644 > --- a/softmmu/physmem.c > +++ b/softmmu/physmem.c > @@ -1919,8 +1919,23 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, > ram_addr_t length) > } > } > > +static void dirty_memory_free(DirtyMemoryBlocks *blocks) > +{ > + int i; > + > + /* > + *'nr_blocks_inuse' is more than nr_blocks (memory was extended) > + * or it's less than 'nr_blocks' (memory shrunk). In the second case > + * we free all the blocks above the nr_blocks_inuse. > + */ > + for (i = blocks->nr_blocks_inuse; i < blocks->nr_blocks; i++) { > + g_free(blocks->blocks[i]); > + } > + g_free(blocks); > +} > + > /* Called with ram_list.mutex held */ > -static void dirty_memory_extend(ram_addr_t old_ram_size, > +static void dirty_memory_resize(ram_addr_t old_ram_size, > ram_addr_t new_ram_size) > { > ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, > @@ -1929,25 +1944,28 @@ static void dirty_memory_extend(ram_addr_t > old_ram_size, > DIRTY_MEMORY_BLOCK_SIZE); > int i; > > - /* Only need to extend if block count increased */ > - if (new_num_blocks <= old_num_blocks) { > + /* Only need to resize if block count changed */ > + if (new_num_blocks == old_num_blocks) { > return; > } > > for (i = 0; i < DIRTY_MEMORY_NUM; i++) { > DirtyMemoryBlocks *old_blocks; > DirtyMemoryBlocks *new_blocks; > + unsigned int num_blocks = MAX(old_num_blocks, new_num_blocks); > int j; > > old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]); > new_blocks = g_malloc(sizeof(*new_blocks) + > - sizeof(new_blocks->blocks[0]) * > new_num_blocks); > + sizeof(new_blocks->blocks[0]) * num_blocks); > + new_blocks->nr_blocks = new_num_blocks; Here new_num_blocks is passed to nr_blocks, however the allocation is with size max(old, new). Shouldn't it still be new_num_blocks? > > if (old_num_blocks) { > memcpy(new_blocks->blocks, old_blocks->blocks, > old_num_blocks * sizeof(old_blocks->blocks[0])); Here we copied over all old pointers even if old>new.. If we allocate the new array with new_num_blocks entries only, shouldn't we copy min(old, new) here instead? Thanks, > } > > + /* memory extend case (new>old): allocate new blocks*/ > for (j = old_num_blocks; j < new_num_blocks; j++) { > new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE); > } > @@ -1955,7 +1973,8 @@ static void dirty_memory_extend(ram_addr_t old_ram_size, > qatomic_rcu_set(&ram_list.dirty_memory[i], new_blocks); > > if (old_blocks) { > - g_free_rcu(old_blocks, rcu); > + old_blocks->nr_blocks_inuse = new_num_blocks; > + call_rcu(old_blocks, dirty_memory_free, rcu); > } > } > } > @@ -2001,7 +2020,7 @@ static void ram_block_add(RAMBlock *new_block, Error > **errp) > new_ram_size = MAX(old_ram_size, > (new_block->offset + new_block->max_length) >> > TARGET_PAGE_BITS); > if (new_ram_size > old_ram_size) { > - dirty_memory_extend(old_ram_size, new_ram_size); > + dirty_memory_resize(old_ram_size, new_ram_size); > } > /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, > * QLIST (which has an RCU-friendly variant) does not have insertion at > @@ -2218,6 +2237,8 @@ static void reclaim_ramblock(RAMBlock *block) > > void qemu_ram_free(RAMBlock *block) > { > + ram_addr_t old_ram_size, new_ram_size; > + > if (!block) { > return; > } > @@ -2228,12 +2249,17 @@ void qemu_ram_free(RAMBlock *block) > } > > qemu_mutex_lock_ramlist(); > + old_ram_size = last_ram_page(); > + > QLIST_REMOVE_RCU(block, next); > ram_list.mru_block = NULL; > /* Write list before version */ > smp_wmb(); > ram_list.version++; > call_rcu(block, reclaim_ramblock, rcu); > + > + new_ram_size = last_ram_page(); > + dirty_memory_resize(old_ram_size, new_ram_size); > qemu_mutex_unlock_ramlist(); > } > > -- > 2.34.1 > -- Peter Xu