On Sun, Jun 27, 2021 at 01:38:16PM +0800, huang...@chinatelecom.cn wrote: > From: Hyman Huang(黄勇) <huang...@chinatelecom.cn> > > introduce util functions to setup the DIRTY_MEMORY_DIRTY_RATE > dirty bits for the convenience of tracking dirty bitmap when > calculating dirtyrate. > > Signed-off-by: Hyman Huang(黄勇) <huang...@chinatelecom.cn> > --- > include/exec/ram_addr.h | 121 > ++++++++++++++++++++++++++++++++++++++++++++++++ > softmmu/physmem.c | 61 ++++++++++++++++++++++++ > 2 files changed, 182 insertions(+) > > diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h > index 6070a52..57dc96b 100644 > --- a/include/exec/ram_addr.h > +++ b/include/exec/ram_addr.h > @@ -435,6 +435,12 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t > start, > ram_addr_t length, > unsigned client); > > +void cpu_physical_memory_dirtyrate_clear_bit(ram_addr_t start, > + ram_addr_t length); > + > +void cpu_physical_memory_dirtyrate_reprotect_bit(ram_addr_t start, > + ram_addr_t length); > + > DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty > (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); > > @@ -523,5 +529,120 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock > *rb, > > return num_dirty; > } > + > +/* Called with RCU critical section */ > +static inline > +void cpu_physical_memory_dirtyrate_clear_dirty_bits(RAMBlock *rb) > +{ > + ram_addr_t addr; > + ram_addr_t length = rb->used_length; > + unsigned long word = BIT_WORD(rb->offset >> TARGET_PAGE_BITS); > + > + /* start address and length is aligned at the start of a word? */ > + if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == rb->offset && > + !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { > + int k; > + int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); > + unsigned long * const *src; > + unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; > + unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % > + DIRTY_MEMORY_BLOCK_SIZE); > + > + src = qatomic_rcu_read( > + &ram_list.dirty_memory[DIRTY_MEMORY_DIRTY_RATE])->blocks; > + > + for (k = 0; k < nr; k++) { > + if (src[idx][offset]) { > + qatomic_set(&src[idx][offset], 0); > + } > + if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { > + offset = 0; > + idx++; > + } > + } > + } else { > + ram_addr_t offset = rb->offset; > + > + for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { > + cpu_physical_memory_dirtyrate_clear_bit(addr + offset, > + TARGET_PAGE_SIZE); > + } > + } > + > + return; > +} > + > +/* Called with RCU critical section */ > +static inline > +uint64_t cpu_physical_memory_dirtyrate_stat_dirty_bits(RAMBlock *rb) > +{ > + uint64_t dirty_pages = 0; > + ram_addr_t addr; > + ram_addr_t length = rb->used_length; > + unsigned long word = BIT_WORD(rb->offset >> TARGET_PAGE_BITS); > + unsigned long bits; > + > + /* start address and length is aligned at the start of a word? */ > + if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == rb->offset && > + !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { > + int k; > + int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); > + unsigned long * const *src; > + unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; > + unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % > + DIRTY_MEMORY_BLOCK_SIZE); > + > + src = qatomic_rcu_read( > + &ram_list.dirty_memory[DIRTY_MEMORY_DIRTY_RATE])->blocks; > + > + for (k = 0; k < nr; k++) { > + if (src[idx][offset]) { > + bits = qatomic_read(&src[idx][offset]); > + dirty_pages += ctpopl(bits); > + } > + > + if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { > + offset = 0; > + idx++; > + } > + } > + } else { > + ram_addr_t offset = rb->offset; > + > + for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { > + if (cpu_physical_memory_get_dirty(offset + addr, > + TARGET_PAGE_SIZE, > + DIRTY_MEMORY_DIRTY_RATE)) { > + dirty_pages++; > + } > + } > + } > + > + return dirty_pages; > +}
If my understanding in the cover letter was correct, all codes until here can be dropped if without the extra bitmap. > + > +static inline > +void cpu_physical_memory_dirtyrate_reset_protect(RAMBlock *rb) > +{ > + ram_addr_t addr; > + ram_addr_t length = rb->used_length; > + unsigned long word = BIT_WORD(rb->offset >> TARGET_PAGE_BITS); > + > + /* start address and length is aligned at the start of a word? */ > + if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == rb->offset && > + !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { > + memory_region_clear_dirty_bitmap(rb->mr, 0, length); > + } else { > + ram_addr_t offset = rb->offset; > + > + for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { > + cpu_physical_memory_dirtyrate_reprotect_bit(offset + addr, > + TARGET_PAGE_SIZE); > + } > + } > + > + return; > +} Confused why we need this complexity. Can we unconditionally do: static inline void cpu_physical_memory_dirtyrate_reset_protect(RAMBlock *rb) { memory_region_clear_dirty_bitmap(rb->mr, 0, rb->used_length); } ? Then we can even drop the helper, maybe? Below functions seem to be able to be dropped too if without the dirty rate bitmap. Then, maybe, this patch is not needed.. > + > #endif > #endif > diff --git a/softmmu/physmem.c b/softmmu/physmem.c > index 9b171c9..d68649a 100644 > --- a/softmmu/physmem.c > +++ b/softmmu/physmem.c > @@ -1068,6 +1068,67 @@ bool > cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, > return dirty; > } > > +void cpu_physical_memory_dirtyrate_clear_bit(ram_addr_t start, > + ram_addr_t length) > +{ > + DirtyMemoryBlocks *blocks; > + unsigned long end, page; > + RAMBlock *ramblock; > + > + if (length == 0) { > + return; > + } > + > + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; > + page = start >> TARGET_PAGE_BITS; > + > + WITH_RCU_READ_LOCK_GUARD() { > + blocks = > + > qatomic_rcu_read(&ram_list.dirty_memory[DIRTY_MEMORY_DIRTY_RATE]); > + ramblock = qemu_get_ram_block(start); > + /* Range sanity check on the ramblock */ > + assert(start >= ramblock->offset && > + start + length <= ramblock->offset + ramblock->used_length); > + while (page < end) { > + unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; > + unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; > + unsigned long num = MIN(end - page, > + DIRTY_MEMORY_BLOCK_SIZE - offset); > + > + clear_bit(num, blocks->blocks[idx]); > + page += num; > + } > + } > + > + return; > +} > + > +void cpu_physical_memory_dirtyrate_reprotect_bit(ram_addr_t start, > + ram_addr_t length) > +{ > + unsigned long end, start_page; > + RAMBlock *ramblock; > + uint64_t mr_offset, mr_size; > + > + if (length == 0) { > + return; > + } > + > + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; > + start_page = start >> TARGET_PAGE_BITS; > + > + ramblock = qemu_get_ram_block(start); > + /* Range sanity check on the ramblock */ > + assert(start >= ramblock->offset && > + start + length <= ramblock->offset + ramblock->used_length); > + > + mr_offset = (ram_addr_t)(start_page << TARGET_PAGE_BITS) - > ramblock->offset; > + mr_size = (end - start_page) << TARGET_PAGE_BITS; > + memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size); > + > + return; > +} > + > DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty > (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client) > { > -- > 1.8.3.1 > -- Peter Xu