To reduce the guest's downtime during checkpoint, migrate dirty
ram pages as many as possible before colo checkpoint.
If the iteration count reaches COLO_RAM_MIGRATE_ITERATION_MAX or
ram pending size is lower than 'x-colo-migrate-ram-threshold',
stop the ram migration and colo checkpoint.
Signed-off-by: Derek Su <dere...@qnap.com>
---
migration/colo.c | 79 ++++++++++++++++++++++++++++++++++++++++++
migration/migration.c | 20 +++++++++++
migration/trace-events | 2 ++
monitor/hmp-cmds.c | 8 +++++
qapi/migration.json | 18 ++++++++--
5 files changed, 125 insertions(+), 2 deletions(-)
diff --git a/migration/colo.c b/migration/colo.c
index ea7d1e9d4e..a0c71d7c56 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -46,6 +46,13 @@ static Notifier packets_compare_notifier;
static COLOMode last_colo_mode;
#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
+#define COLO_RAM_MIGRATE_ITERATION_MAX 10
+
+typedef enum {
+ COLO_MIG_ITERATE_RESUME = 0, /* Resume migration iteration */
+ COLO_MIG_ITERATE_BREAK = 1, /* Break migration loop */
+} COLOMigIterateState;
+
bool migration_in_colo_state(void)
{
@@ -398,6 +405,68 @@ static uint64_t colo_receive_message_value(QEMUFile *f,
uint32_t expect_msg,
return value;
}
+static int colo_migrate_ram_iteration(MigrationState *s, Error **errp)
+{
+ int64_t threshold_size = s->parameters.x_colo_migrate_ram_threshold;
+ uint64_t pending_size, pend_pre, pend_compat, pend_post;
+ Error *local_err = NULL;
+ int ret;
+
+ if (threshold_size == 0) {
+ return COLO_MIG_ITERATE_BREAK;
+ }
+
+ qemu_savevm_state_pending(s->to_dst_file, threshold_size,
+ &pend_pre, &pend_compat, &pend_post);
+ pending_size = pend_pre + pend_compat + pend_post;
+
+ trace_colo_migrate_pending(pending_size, threshold_size,
+ pend_pre, pend_compat, pend_post);
+
+ /* Still a significant amount to transfer */
+ if (pending_size && pending_size >= threshold_size) {
+ colo_send_message(s->to_dst_file, COLO_MESSAGE_MIGRATE_RAM,
&local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return COLO_MIG_ITERATE_BREAK;
+ }
+
+ qemu_savevm_state_iterate(s->to_dst_file, false);
+ qemu_put_byte(s->to_dst_file, QEMU_VM_EOF);
+
+ ret = qemu_file_get_error(s->to_dst_file);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to send dirty pages");
+ return COLO_MIG_ITERATE_BREAK;
+ }
+
+ return COLO_MIG_ITERATE_RESUME;
+ }
+
+ trace_colo_migration_low_pending(pending_size);
+
+ return COLO_MIG_ITERATE_BREAK;
+}
+
+static void colo_migrate_ram_before_checkpoint(MigrationState *s, Error **errp)
+{
+ Error *local_err = NULL;
+ int iterate_count = 0;
+
+ while (iterate_count++ < COLO_RAM_MIGRATE_ITERATION_MAX) {
+ COLOMigIterateState state;
+
+ state = colo_migrate_ram_iteration(s, &local_err);
+ if (state == COLO_MIG_ITERATE_BREAK) {
+ if (local_err) {
+ error_propagate(errp, local_err);
+ }
+
+ return;
+ }
+ };
+}
+
static int colo_do_checkpoint_transaction(MigrationState *s,
QIOChannelBuffer *bioc,
QEMUFile *fb)
@@ -405,6 +474,11 @@ static int colo_do_checkpoint_transaction(MigrationState
*s,
Error *local_err = NULL;
int ret = -1;
+ colo_migrate_ram_before_checkpoint(s, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
&local_err);
if (local_err) {
@@ -819,6 +893,11 @@ static void
colo_wait_handle_message(MigrationIncomingState *mis,
case COLO_MESSAGE_CHECKPOINT_REQUEST:
colo_incoming_process_checkpoint(mis, fb, bioc, errp);
break;
+ case COLO_MESSAGE_MIGRATE_RAM:
+ if (qemu_loadvm_state_main(mis->from_src_file, mis) < 0) {
+ error_setg(errp, "Load ram failed");
+ }
+ break;
default:
error_setg(errp, "Got unknown COLO message: %d", msg);
break;
diff --git a/migration/migration.c b/migration/migration.c
index 481a590f72..390937ae5d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -88,6 +88,7 @@
/* The delay time (in ms) between two COLO checkpoints */
#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
+#define DEFAULT_COLO_MIGRATE_RAM_THRESHOLD (100 * 1024 * 1024UL)
#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE
/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */
@@ -800,6 +801,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error
**errp)
params->downtime_limit = s->parameters.downtime_limit;
params->has_x_checkpoint_delay = true;
params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
+ params->has_x_colo_migrate_ram_threshold = true;
+ params->x_colo_migrate_ram_threshold =
+ s->parameters.x_colo_migrate_ram_threshold;
params->has_block_incremental = true;
params->block_incremental = s->parameters.block_incremental;
params->has_multifd_channels = true;
@@ -1223,6 +1227,8 @@ static bool migrate_params_check(MigrationParameters
*params, Error **errp)
/* x_checkpoint_delay is now always positive */
+ /* x_colo_migrate_ram_threshold is zero or positive */
+
if (params->has_multifd_channels && (params->multifd_channels < 1)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"multifd_channels",
@@ -1356,6 +1362,11 @@ static void
migrate_params_test_apply(MigrateSetParameters *params,
dest->x_checkpoint_delay = params->x_checkpoint_delay;
}
+ if (params->has_x_colo_migrate_ram_threshold) {
+ dest->x_colo_migrate_ram_threshold =
+ params->x_colo_migrate_ram_threshold;
+ }
+
if (params->has_block_incremental) {
dest->block_incremental = params->block_incremental;
}
@@ -1463,6 +1474,11 @@ static void migrate_params_apply(MigrateSetParameters
*params, Error **errp)
}
}
+ if (params->has_x_colo_migrate_ram_threshold) {
+ s->parameters.x_colo_migrate_ram_threshold =
+ params->x_colo_migrate_ram_threshold;
+ }
+
if (params->has_block_incremental) {
s->parameters.block_incremental = params->block_incremental;
}
@@ -3622,6 +3638,9 @@ static Property migration_properties[] = {
DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
parameters.x_checkpoint_delay,
DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
+ DEFINE_PROP_UINT64("x-colo-migrate-ram-threshold", MigrationState,
+ parameters.x_colo_migrate_ram_threshold,
+ DEFAULT_COLO_MIGRATE_RAM_THRESHOLD),
DEFINE_PROP_UINT8("multifd-channels", MigrationState,
parameters.multifd_channels,
DEFAULT_MIGRATE_MULTIFD_CHANNELS),
@@ -3724,6 +3743,7 @@ static void migration_instance_init(Object *obj)
params->has_max_bandwidth = true;
params->has_downtime_limit = true;
params->has_x_checkpoint_delay = true;
+ params->has_x_colo_migrate_ram_threshold = true;
params->has_block_incremental = true;
params->has_multifd_channels = true;
params->has_multifd_compression = true;
diff --git a/migration/trace-events b/migration/trace-events
index 4ab0a503d2..32bf42cdb3 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -295,6 +295,8 @@ migration_tls_incoming_handshake_complete(void) ""
colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
colo_send_message(const char *msg) "Send '%s' message"
colo_receive_message(const char *msg) "Receive '%s' message"
+colo_migrate_pending(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint64_t post) "pending size %" PRIu64 "
max %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
+colo_migration_low_pending(uint64_t pending) "%" PRIu64
# colo-failover.c
colo_failover_set_state(const char *new_state) "new state %s"
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index e03adf0d4d..ebca533960 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -450,6 +450,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict
*qdict)
monitor_printf(mon, "%s: %u\n",
MigrationParameter_str(MIGRATION_PARAMETER_X_CHECKPOINT_DELAY),
params->x_checkpoint_delay);
+ assert(params->has_x_colo_migrate_ram_threshold);
+ monitor_printf(mon, "%s: %" PRIu64 "\n",
+
MigrationParameter_str(MIGRATION_PARAMETER_X_COLO_MIGRATE_RAM_THRESHOLD),
+ params->x_colo_migrate_ram_threshold);
assert(params->has_block_incremental);
monitor_printf(mon, "%s: %s\n",
MigrationParameter_str(MIGRATION_PARAMETER_BLOCK_INCREMENTAL),
@@ -1330,6 +1334,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict
*qdict)
p->has_x_checkpoint_delay = true;
visit_type_int(v, param, &p->x_checkpoint_delay, &err);
break;
+ case MIGRATION_PARAMETER_X_COLO_MIGRATE_RAM_THRESHOLD:
+ p->has_x_colo_migrate_ram_threshold = true;
+ visit_type_int(v, param, &p->x_colo_migrate_ram_threshold, &err);
+ break;
case MIGRATION_PARAMETER_BLOCK_INCREMENTAL:
p->has_block_incremental = true;
visit_type_bool(v, param, &p->block_incremental, &err);
diff --git a/qapi/migration.json b/qapi/migration.json
index d5000558c6..30576c038c 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -600,6 +600,9 @@
# @x-checkpoint-delay: The delay time (in ms) between two COLO checkpoints in
# periodic mode. (Since 2.8)
#
+# @x-colo-migrate-ram-threshold: the threshold (in bytes) of the COLO ram
migration's
+# pending size before COLO checkpoint. (Since
5.0)
+#
# @block-incremental: Affects how much storage is migrated when the
# block migration capability is enabled. When false, the
entire
# storage backing chain is migrated into a flattened
image at
@@ -651,7 +654,8 @@
'cpu-throttle-initial', 'cpu-throttle-increment',
'cpu-throttle-tailslow',
'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth',
- 'downtime-limit', 'x-checkpoint-delay', 'block-incremental',
+ 'downtime-limit', 'x-checkpoint-delay',
+ 'x-colo-migrate-ram-threshold', 'block-incremental',
'multifd-channels',
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
@@ -740,6 +744,9 @@
#
# @x-checkpoint-delay: the delay time between two COLO checkpoints. (Since
2.8)
#
+# @x-colo-migrate-ram-threshold: the threshold in bytes of the COLO ram
migration's
+# pending size before COLO checkpoint. (Since
5.0)
+#
# @block-incremental: Affects how much storage is migrated when the
# block migration capability is enabled. When false, the
entire
# storage backing chain is migrated into a flattened
image at
@@ -804,6 +811,7 @@
'*max-bandwidth': 'int',
'*downtime-limit': 'int',
'*x-checkpoint-delay': 'int',
+ '*x-colo-migrate-ram-threshold': 'int',
'*block-incremental': 'bool',
'*multifd-channels': 'int',
'*xbzrle-cache-size': 'size',
@@ -915,6 +923,9 @@
#
# @x-checkpoint-delay: the delay time between two COLO checkpoints. (Since
2.8)
#
+# @x-colo-migrate-ram-threshold: the threshold in bytes of the COLO ram
migration's
+# pending size before COLO checkpoint. (Since
5.0)
+#
# @block-incremental: Affects how much storage is migrated when the
# block migration capability is enabled. When false, the
entire
# storage backing chain is migrated into a flattened
image at
@@ -978,6 +989,7 @@
'*max-bandwidth': 'size',
'*downtime-limit': 'uint64',
'*x-checkpoint-delay': 'uint32',
+ '*x-colo-migrate-ram-threshold': 'uint64',
'*block-incremental': 'bool' ,
'*multifd-channels': 'uint8',
'*xbzrle-cache-size': 'size',
@@ -1116,12 +1128,14 @@
#
# @vmstate-loaded: VM's state has been loaded by SVM.
#
+# @migrate-ram: Send dirty pages as many as possible before COLO checkpoint.
+#
# Since: 2.8
##
{ 'enum': 'COLOMessage',
'data': [ 'checkpoint-ready', 'checkpoint-request', 'checkpoint-reply',
'vmstate-send', 'vmstate-size', 'vmstate-received',
- 'vmstate-loaded' ] }
+ 'vmstate-loaded', 'migrate-ram' ] }
##
# @COLOMode: