* Wei Yang (richardw.y...@linux.intel.com) wrote: > This patch enable compress with postcopy. > > This is a RFC and based on some unmerged patch > > "migration: extract ram_load_precopy" > "migration/postcopy: skip compression when postcopy is active" > > Signed-off-by: Wei Yang <richardw.y...@linux.intel.com> > --- > migration/postcopy-ram.c | 3 +-- > migration/ram.c | 35 +++++++++++++++++++++-------------- > 2 files changed, 22 insertions(+), 16 deletions(-) > > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c > index a7e7ec9c22..70b6beb5a9 100644 > --- a/migration/postcopy-ram.c > +++ b/migration/postcopy-ram.c > @@ -1252,8 +1252,7 @@ int postcopy_place_page_zero(MigrationIncomingState > *mis, void *host, > } > memset(mis->postcopy_tmp_zero_page, '\0', > mis->largest_page_size); > } > - return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, > - rb); > + return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, > rb);
Please keep these type of cleanups separate. > } > } > > diff --git a/migration/ram.c b/migration/ram.c > index a0d3bc60b2..c1d6eadf38 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -2384,16 +2384,6 @@ static bool save_page_use_compression(RAMState *rs) > return false; > } > > - /* > - * The decompression threads asynchronously write into RAM > - * rather than use the atomic copies needed to avoid > - * userfaulting. It should be possible to fix the decompression > - * threads for compatibility in future. > - */ > - if (migration_in_postcopy()) { > - return false; > - } > - > /* > * If xbzrle is on, stop using the data compression after first > * round of migration even if compression is enabled. In theory, > @@ -3433,6 +3423,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) > } > i++; > } > + > + if (migrate_postcopy_ram()) { > + flush_compressed_data(rs); > + } > + > rcu_read_unlock(); > > /* > @@ -4019,6 +4014,7 @@ static int ram_load_postcopy(QEMUFile *f) > void *place_source = NULL; > RAMBlock *block = NULL; > uint8_t ch; > + int len; > > addr = qemu_get_be64(f); > > @@ -4036,7 +4032,8 @@ static int ram_load_postcopy(QEMUFile *f) > > trace_ram_load_postcopy_loop((uint64_t)addr, flags); > place_needed = false; > - if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) { > + if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | > + RAM_SAVE_FLAG_COMPRESS_PAGE)) { > block = ram_block_from_stream(f, flags); > > host = host_from_ram_block_offset(block, addr); > @@ -4109,6 +4106,17 @@ static int ram_load_postcopy(QEMUFile *f) > TARGET_PAGE_SIZE); > } > break; > + case RAM_SAVE_FLAG_COMPRESS_PAGE: > + all_zero = false; > + len = qemu_get_be32(f); > + if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { > + error_report("Invalid compressed data length: %d", len); > + ret = -EINVAL; > + break; > + } > + decompress_data_with_multi_threads(f, page_buffer, len); > + ret |= wait_for_decompress_done(); I think this might work for a 4k page host; but I'm not sure it's safe on hugepages or ARM/Power where they have bigger pages. ram_load_postcopy relies on all of the pages within a single hostpage arriving before the last subpage and that's what then triggers the call to postcopy_place_page; that relies on some ordering - but I don't think that the multiple compress threads on the source have any ordering between the threads - or am I missing something about how the multiple threads are organised? Dave > + break; > case RAM_SAVE_FLAG_EOS: > /* normal exit */ > multifd_recv_sync_main(); > @@ -4130,8 +4138,7 @@ static int ram_load_postcopy(QEMUFile *f) > void *place_dest = host + TARGET_PAGE_SIZE - block->page_size; > > if (all_zero) { > - ret = postcopy_place_page_zero(mis, place_dest, > - block); > + ret = postcopy_place_page_zero(mis, place_dest, block); > } else { > ret = postcopy_place_page(mis, place_dest, > place_source, block); > @@ -4372,6 +4379,7 @@ static int ram_load_precopy(QEMUFile *f) > } > } > > + ret |= wait_for_decompress_done(); > return ret; > } > > @@ -4405,7 +4413,6 @@ static int ram_load(QEMUFile *f, void *opaque, int > version_id) > ret = ram_load_precopy(f); > } > > - ret |= wait_for_decompress_done(); > rcu_read_unlock(); > trace_ram_load_complete(ret, seq_iter); > > -- > 2.17.1 > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK