* Dr. David Alan Gilbert (dgilb...@redhat.com) wrote: > * Pavel Butsykin (pbutsy...@virtuozzo.com) wrote: > > This feature frees the migrated memory on the source during postcopy-ram > > migration. In the second step of postcopy-ram migration when the source vm > > is put on pause we can free unnecessary memory. It will allow, in > > particular, > > to start relaxing the memory stress on the source host in a load-balancing > > scenario. > > > > Signed-off-by: Pavel Butsykin <pbutsy...@virtuozzo.com> > > Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Actually, note the error from patchew; you need to fix up the error reports that print iov_len to %zd I think. Dave > > --- > > include/migration/migration.h | 1 + > > include/migration/qemu-file.h | 3 ++- > > migration/migration.c | 9 +++++++ > > migration/qemu-file.c | 59 > > ++++++++++++++++++++++++++++++++++++++----- > > migration/ram.c | 22 +++++++++++++++- > > qapi-schema.json | 5 +++- > > 6 files changed, 89 insertions(+), 10 deletions(-) > > > > diff --git a/include/migration/migration.h b/include/migration/migration.h > > index bd399fc0df..401fbe1f77 100644 > > --- a/include/migration/migration.h > > +++ b/include/migration/migration.h > > @@ -307,6 +307,7 @@ int migrate_add_blocker(Error *reason, Error **errp); > > */ > > void migrate_del_blocker(Error *reason); > > > > +bool migrate_release_ram(void); > > bool migrate_postcopy_ram(void); > > bool migrate_zero_blocks(void); > > > > diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h > > index abedd466c9..0cd648a733 100644 > > --- a/include/migration/qemu-file.h > > +++ b/include/migration/qemu-file.h > > @@ -132,7 +132,8 @@ void qemu_put_byte(QEMUFile *f, int v); > > * put_buffer without copying the buffer. > > * The buffer should be available till it is sent asynchronously. > > */ > > -void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size); > > +void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, > > + bool may_free); > > bool qemu_file_mode_is_not_valid(const char *mode); > > bool qemu_file_is_writable(QEMUFile *f); > > > > diff --git a/migration/migration.c b/migration/migration.c > > index 1ae68be0c7..8d5a5f8a6e 100644 > > --- a/migration/migration.c > > +++ b/migration/migration.c > > @@ -1302,6 +1302,15 @@ void qmp_migrate_set_downtime(double value, Error > > **errp) > > qmp_migrate_set_parameters(&p, errp); > > } > > > > +bool migrate_release_ram(void) > > +{ > > + MigrationState *s; > > + > > + s = migrate_get_current(); > > + > > + return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; > > +} > > + > > bool migrate_postcopy_ram(void) > > { > > MigrationState *s; > > diff --git a/migration/qemu-file.c b/migration/qemu-file.c > > index e9fae31158..82dbef3c86 100644 > > --- a/migration/qemu-file.c > > +++ b/migration/qemu-file.c > > @@ -49,6 +49,7 @@ struct QEMUFile { > > int buf_size; /* 0 when writing */ > > uint8_t buf[IO_BUF_SIZE]; > > > > + DECLARE_BITMAP(may_free, MAX_IOV_SIZE); > > struct iovec iov[MAX_IOV_SIZE]; > > unsigned int iovcnt; > > > > @@ -132,6 +133,41 @@ bool qemu_file_is_writable(QEMUFile *f) > > return f->ops->writev_buffer; > > } > > > > +static void qemu_iovec_release_ram(QEMUFile *f) > > +{ > > + struct iovec iov; > > + unsigned long idx; > > + > > + /* Find and release all the contiguous memory ranges marked as > > may_free. */ > > + idx = find_next_bit(f->may_free, f->iovcnt, 0); > > + if (idx >= f->iovcnt) { > > + return; > > + } > > + iov = f->iov[idx]; > > + > > + /* The madvise() in the loop is called for iov within a continuous > > range and > > + * then reinitialize the iov. And in the end, madvise() is called for > > the > > + * last iov. > > + */ > > + while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < > > f->iovcnt) { > > + /* check for adjacent buffer and coalesce them */ > > + if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) { > > + iov.iov_len += f->iov[idx].iov_len; > > + continue; > > + } > > + if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < > > 0) { > > + error_report("migrate: madvise DONTNEED failed %p %ld: %s", > > + iov.iov_base, iov.iov_len, strerror(errno)); > > + } > > + iov = f->iov[idx]; > > + } > > + if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) { > > + error_report("migrate: madvise DONTNEED failed %p %ld: %s", > > + iov.iov_base, iov.iov_len, strerror(errno)); > > + } > > + memset(f->may_free, 0, sizeof(f->may_free)); > > +} > > + > > /** > > * Flushes QEMUFile buffer > > * > > @@ -151,6 +187,8 @@ void qemu_fflush(QEMUFile *f) > > if (f->iovcnt > 0) { > > expect = iov_size(f->iov, f->iovcnt); > > ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos); > > + > > + qemu_iovec_release_ram(f); > > } > > > > if (ret >= 0) { > > @@ -304,13 +342,19 @@ int qemu_fclose(QEMUFile *f) > > return ret; > > } > > > > -static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size) > > +static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size, > > + bool may_free) > > { > > /* check for adjacent buffer and coalesce them */ > > if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base + > > - f->iov[f->iovcnt - 1].iov_len) { > > + f->iov[f->iovcnt - 1].iov_len && > > + may_free == test_bit(f->iovcnt - 1, f->may_free)) > > + { > > f->iov[f->iovcnt - 1].iov_len += size; > > } else { > > + if (may_free) { > > + set_bit(f->iovcnt, f->may_free); > > + } > > f->iov[f->iovcnt].iov_base = (uint8_t *)buf; > > f->iov[f->iovcnt++].iov_len = size; > > } > > @@ -320,14 +364,15 @@ static void add_to_iovec(QEMUFile *f, const uint8_t > > *buf, size_t size) > > } > > } > > > > -void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size) > > +void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, > > + bool may_free) > > { > > if (f->last_error) { > > return; > > } > > > > f->bytes_xfer += size; > > - add_to_iovec(f, buf, size); > > + add_to_iovec(f, buf, size, may_free); > > } > > > > void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) > > @@ -345,7 +390,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, > > size_t size) > > } > > memcpy(f->buf + f->buf_index, buf, l); > > f->bytes_xfer += l; > > - add_to_iovec(f, f->buf + f->buf_index, l); > > + add_to_iovec(f, f->buf + f->buf_index, l, false); > > f->buf_index += l; > > if (f->buf_index == IO_BUF_SIZE) { > > qemu_fflush(f); > > @@ -366,7 +411,7 @@ void qemu_put_byte(QEMUFile *f, int v) > > > > f->buf[f->buf_index] = v; > > f->bytes_xfer++; > > - add_to_iovec(f, f->buf + f->buf_index, 1); > > + add_to_iovec(f, f->buf + f->buf_index, 1, false); > > f->buf_index++; > > if (f->buf_index == IO_BUF_SIZE) { > > qemu_fflush(f); > > @@ -647,7 +692,7 @@ ssize_t qemu_put_compression_data(QEMUFile *f, const > > uint8_t *p, size_t size, > > } > > qemu_put_be32(f, blen); > > if (f->ops->writev_buffer) { > > - add_to_iovec(f, f->buf + f->buf_index, blen); > > + add_to_iovec(f, f->buf + f->buf_index, blen, false); > > } > > f->buf_index += blen; > > if (f->buf_index == IO_BUF_SIZE) { > > diff --git a/migration/ram.c b/migration/ram.c > > index d866b6518b..5a43f716d1 100644 > > --- a/migration/ram.c > > +++ b/migration/ram.c > > @@ -726,6 +726,16 @@ static int save_zero_page(QEMUFile *f, RAMBlock > > *block, ram_addr_t offset, > > return pages; > > } > > > > +static void ram_release_pages(MigrationState *ms, const char *block_name, > > + uint64_t offset, int pages) > > +{ > > + if (!migrate_release_ram() || !migration_in_postcopy(ms)) { > > + return; > > + } > > + > > + ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS); > > +} > > + > > /** > > * ram_save_page: Send the given page to the stream > > * > > @@ -786,6 +796,7 @@ static int ram_save_page(MigrationState *ms, QEMUFile > > *f, PageSearchStatus *pss, > > * page would be stale > > */ > > xbzrle_cache_zero_page(current_addr); > > + ram_release_pages(ms, block->idstr, pss->offset, pages); > > } else if (!ram_bulk_stage && > > !migration_in_postcopy(ms) && migrate_use_xbzrle()) { > > pages = save_xbzrle_page(f, &p, current_addr, block, > > @@ -804,7 +815,9 @@ static int ram_save_page(MigrationState *ms, QEMUFile > > *f, PageSearchStatus *pss, > > *bytes_transferred += save_page_header(f, block, > > offset | > > RAM_SAVE_FLAG_PAGE); > > if (send_async) { > > - qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); > > + qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE, > > + migrate_release_ram() & > > + migration_in_postcopy(ms)); > > } else { > > qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > > } > > @@ -834,6 +847,8 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock > > *block, > > error_report("compressed data failed!"); > > } else { > > bytes_sent += blen; > > + ram_release_pages(migrate_get_current(), block->idstr, > > + offset & TARGET_PAGE_MASK, 1); > > } > > > > return bytes_sent; > > @@ -973,12 +988,17 @@ static int ram_save_compressed_page(MigrationState > > *ms, QEMUFile *f, > > error_report("compressed data failed!"); > > } > > } > > + if (pages > 0) { > > + ram_release_pages(ms, block->idstr, pss->offset, pages); > > + } > > } else { > > offset |= RAM_SAVE_FLAG_CONTINUE; > > pages = save_zero_page(f, block, offset, p, bytes_transferred); > > if (pages == -1) { > > pages = compress_page_with_multi_thread(f, block, offset, > > bytes_transferred); > > + } else { > > + ram_release_pages(ms, block->idstr, pss->offset, pages); > > } > > } > > } > > diff --git a/qapi-schema.json b/qapi-schema.json > > index 82fabc6e24..e58228d083 100644 > > --- a/qapi-schema.json > > +++ b/qapi-schema.json > > @@ -865,11 +865,14 @@ > > # side, this process is called COarse-Grain LOck Stepping (COLO) for > > # Non-stop Service. (since 2.8) > > # > > +# @release-ram: if enabled, qemu will free the migrated ram pages on the > > source > > +# during postcopy-ram migration. (since 2.9) > > +# > > # Since: 1.2 > > ## > > { 'enum': 'MigrationCapability', > > 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', > > - 'compress', 'events', 'postcopy-ram', 'x-colo'] } > > + 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram'] } > > > > ## > > # @MigrationCapabilityStatus: > > -- > > 2.11.0 > > > > > -- > Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK