In QEMU's migration/options.c a maximum of 2000 seconds is defined for the 'downtime-limit' migration parameter. Previously, there would be warnings and errors for non-converging migrations:
> Parameter 'downtime_limit' expects an integer in the range of 0 to (2000 * > 1000) ms and at some point later: > qmp command 'migrate-set-parameters' failed - Parameter 'downtime-limit' > expects uint64 Signed-off-by: Fiona Ebner <[email protected]> --- src/PVE/QemuMigrate.pm | 57 +++++++++++++++++++++++++++++++----------- src/PVE/QemuServer.pm | 2 +- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/PVE/QemuMigrate.pm b/src/PVE/QemuMigrate.pm index 6f7fd9f1..57c72279 100644 --- a/src/PVE/QemuMigrate.pm +++ b/src/PVE/QemuMigrate.pm @@ -1150,6 +1150,46 @@ sub phase2_start_remote_cluster { return ($res->{migrate}, $res->{spice_port}); } +my $migrate_downtime_max = 2000 * 1000; # as defined in QEMU's migration/options.c + +my sub cap_migrate_downtime { + my ($self, $migrate_downtime) = @_; + + if ($migrate_downtime > $migrate_downtime_max) { + $self->log('info', "capping downtime limit to maximum possible: $migrate_downtime_max ms"); + return $migrate_downtime_max; + } + + return $migrate_downtime; +} + +my sub increase_migrate_downtime { + my ($self, $vmid, $migrate_downtime) = @_; + + return $migrate_downtime_max if $migrate_downtime >= $migrate_downtime_max; + + $migrate_downtime *= 2; + + $migrate_downtime = cap_migrate_downtime($self, $migrate_downtime); + + $self->log( + 'info', + "auto-increased downtime to continue migration: $migrate_downtime ms", + ); + eval { + # migrate-set-parameters does not touch values not + # specified, so this only changes downtime-limit + mon_cmd( + $vmid, + "migrate-set-parameters", + 'downtime-limit' => int($migrate_downtime), + ); + }; + $self->log('info', "migrate-set-parameters error: $@") if $@; + + return $migrate_downtime; +} + sub phase2 { my ($self, $vmid) = @_; @@ -1313,6 +1353,7 @@ sub phase2 { $migrate_downtime = $conf->{migrate_downtime} if defined($conf->{migrate_downtime}); # migrate-set-parameters expects limit in ms $migrate_downtime *= 1000; + $migrate_downtime = cap_migrate_downtime($self, $migrate_downtime); $self->log('info', "migration downtime limit: $migrate_downtime ms"); $qemu_migrate_params->{'downtime-limit'} = int($migrate_downtime); @@ -1493,21 +1534,7 @@ sub phase2 { if ($downtimecounter > 5) { $downtimecounter = 0; - $migrate_downtime *= 2; - $self->log( - 'info', - "auto-increased downtime to continue migration: $migrate_downtime ms", - ); - eval { - # migrate-set-parameters does not touch values not - # specified, so this only changes downtime-limit - mon_cmd( - $vmid, - "migrate-set-parameters", - 'downtime-limit' => int($migrate_downtime), - ); - }; - $self->log('info', "migrate-set-parameters error: $@") if $@; + $migrate_downtime = increase_migrate_downtime($self, $vmid, $migrate_downtime); } } diff --git a/src/PVE/QemuServer.pm b/src/PVE/QemuServer.pm index 4e406386..a7fbec14 100644 --- a/src/PVE/QemuServer.pm +++ b/src/PVE/QemuServer.pm @@ -564,7 +564,7 @@ EODESCR description => "Set maximum tolerated downtime (in seconds) for migrations. Should the" . " migration not be able to converge in the very end, because too much newly dirtied" . " RAM needs to be transferred, the limit will be increased automatically step-by-step" - . " until migration can converge.", + . " until migration can converge. Will be capped to 2000 seconds (maximum in QEMU).", minimum => 0, default => 0.1, }, -- 2.47.3 _______________________________________________ pve-devel mailing list [email protected] https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
