Fixes #5180 [0].

This implements for live-migration:
a) the dbus-vmstate is started on the target side, together with the VM
b) the dbus-vmstate helper is started on the source side
c) everything is cleaned up properly, in any case

It is currently off-by-default and must be enabled via the optional
`with-conntrack-state` migration parameter.

The conntrack entry migration is done in such a way that it can
soft-fail, w/o impacting the actual migration, i.e. considering it
"best-effort".

A failed conntrack entry migration does not have any real impact on
functionality, other than it might exhibit the problems as lined out in
the issue [0].

For remote migrations, only a warning is thrown for now. Cross-cluster
migration has stricter requirements and is not a "one-size-fits-it-all".
E.g. the most promentient issue if the network segmentation is
different, which would make the conntrack entries useless or require
careful rewriting.

[0] https://bugzilla.proxmox.com/show_bug.cgi?id=5180

Signed-off-by: Christoph Heiss <c.he...@proxmox.com>
---
Depends on patch #4 to pve-common & an dependency bump of it.

 PVE/API2/Qemu.pm              |  72 ++++++++++++++++++++
 PVE/CLI/qm.pm                 |   5 ++
 PVE/QemuMigrate.pm            |  64 ++++++++++++++++++
 PVE/QemuServer.pm             |   6 ++
 PVE/QemuServer/DBusVMState.pm | 124 ++++++++++++++++++++++++++++++++++
 PVE/QemuServer/Makefile       |   1 +
 6 files changed, 272 insertions(+)
 create mode 100644 PVE/QemuServer/DBusVMState.pm

diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index 156b1c7b..4d7b8196 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -39,6 +39,7 @@ use PVE::QemuServer::MetaInfo;
 use PVE::QemuServer::PCI;
 use PVE::QemuServer::QMPHelpers;
 use PVE::QemuServer::USB;
+use PVE::QemuServer::DBusVMState;
 use PVE::QemuMigrate;
 use PVE::RPCEnvironment;
 use PVE::AccessControl;
@@ -3035,6 +3036,12 @@ __PACKAGE__->register_method({
                default => 'max(30, vm memory in GiB)',
                optional => 1,
            },
+           'with-conntrack-state' => {
+               type => 'boolean',
+               optional => 1,
+               default => 0,
+               description => 'Whether to migrate conntrack entries for 
running VMs.',
+           }
        },
     },
     returns => {
@@ -3065,6 +3072,7 @@ __PACKAGE__->register_method({
        my $migration_network = $get_root_param->('migration_network');
        my $targetstorage = $get_root_param->('targetstorage');
        my $force_cpu = $get_root_param->('force-cpu');
+       my $with_conntrack_state = $get_root_param->('with-conntrack-state');
 
        my $storagemap;
 
@@ -3136,6 +3144,7 @@ __PACKAGE__->register_method({
                    nbd_proto_version => $nbd_protocol_version,
                    replicated_volumes => $replicated_volumes,
                    offline_volumes => $offline_volumes,
+                   with_conntrack_state => $with_conntrack_state,
                };
 
                my $params = {
@@ -4675,6 +4684,11 @@ __PACKAGE__->register_method({
                },
                description => "List of mapped resources e.g. pci, usb"
            },
+           'has-dbus-vmstate' => {
+               type => 'boolean',
+               description => 'Whether the VM host supports migrating 
additional VM state, '
+                   . 'such as conntrack entries.',
+           }
        },
     },
     code => sub {
@@ -4739,6 +4753,7 @@ __PACKAGE__->register_method({
 
        $res->{local_resources} = $local_resources;
        $res->{'mapped-resources'} = $mapped_resources;
+       $res->{'has-dbus-vmstate'} = 1;
 
        return $res;
 
@@ -4800,6 +4815,12 @@ __PACKAGE__->register_method({
                minimum => '0',
                default => 'migrate limit from datacenter or storage config',
            },
+           'with-conntrack-state' => {
+               type => 'boolean',
+               optional => 1,
+               default => 0,
+               description => 'Whether to migrate conntrack entries for 
running VMs.',
+           }
        },
     },
     returns => {
@@ -4855,6 +4876,7 @@ __PACKAGE__->register_method({
        } else {
            warn "VM isn't running. Doing offline migration instead.\n" if 
$param->{online};
            $param->{online} = 0;
+           $param->{'with-conntrack-state'} = 0;
        }
 
        my $storecfg = PVE::Storage::config();
@@ -6126,6 +6148,7 @@ __PACKAGE__->register_method({
                            warn $@ if $@;
                        }
 
+                       
PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($state->{vmid});
                        PVE::QemuServer::destroy_vm($state->{storecfg}, 
$state->{vmid}, 1);
                    }
 
@@ -6299,4 +6322,53 @@ __PACKAGE__->register_method({
        return { socket => $socket };
     }});
 
+__PACKAGE__->register_method({
+    name => 'dbus_vmstate',
+    path => '{vmid}/dbus-vmstate',
+    method => 'POST',
+    proxyto => 'node',
+    description => 'Stop the dbus-vmstate helper for the given VM if running.',
+    permissions => {
+       check => ['perm', '/vms/{vmid}', [ 'VM.Migrate' ]],
+    },
+    parameters => {
+       additionalProperties => 0,
+       properties => {
+           node => get_standard_option('pve-node'),
+           vmid => get_standard_option('pve-vmid', { completion => 
\&PVE::QemuServer::complete_vmid }),
+           action => {
+               type => 'string',
+               enum => [qw(start stop)],
+               description => 'Action to perform on the DBus VMState helper.',
+               optional => 0,
+           },
+       },
+    },
+    returns => {
+       type => 'null',
+    },
+    code => sub {
+       my ($param) = @_;
+       my ($node, $vmid, $action) = $param->@{qw(node vmid action)};
+
+       my $nodename = PVE::INotify::nodename();
+       if ($node ne 'localhost' && $node ne $nodename) {
+           raise_param_exc({ node => "node needs to be 'localhost' or local 
hostname '$nodename'" });
+       }
+
+       if (!PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+           raise_param_exc({ node => "VM $vmid not running locally on node 
'$nodename'" });
+       }
+
+       if ($action eq 'start') {
+          syslog('info', "starting dbus-vmstate helper for VM $vmid\n");
+          PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+       } elsif ($action eq 'stop') {
+          syslog('info', "stopping dbus-vmstate helper for VM $vmid\n");
+          PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+       } else {
+           die "unknown action $action\n";
+       }
+    }});
+
 1;
diff --git a/PVE/CLI/qm.pm b/PVE/CLI/qm.pm
index 3e3a4c91..32c7629c 100755
--- a/PVE/CLI/qm.pm
+++ b/PVE/CLI/qm.pm
@@ -36,6 +36,7 @@ use PVE::QemuServer::Agent qw(agent_available);
 use PVE::QemuServer::ImportDisk;
 use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer::QMPHelpers;
+use PVE::QemuServer::DBusVMState;
 use PVE::QemuServer;
 
 use PVE::CLIHandler;
@@ -965,6 +966,10 @@ __PACKAGE__->register_method({
                # vm was shutdown from inside the guest or crashed, doing api 
cleanup
                PVE::QemuServer::vm_stop_cleanup($storecfg, $vmid, $conf, 0, 0, 
1);
            }
+
+           # ensure that no dbus-vmstate helper is left running in any case
+           PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+
            PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-stop');
 
            $restart = eval { PVE::QemuServer::clear_reboot_request($vmid) };
diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
index c2e36334..7704a38e 100644
--- a/PVE/QemuMigrate.pm
+++ b/PVE/QemuMigrate.pm
@@ -32,6 +32,7 @@ use PVE::QemuServer::Machine;
 use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer::Memory qw(get_current_memory);
 use PVE::QemuServer::QMPHelpers;
+use PVE::QemuServer::DBusVMState;
 use PVE::QemuServer;
 
 use PVE::AbstractMigrate;
@@ -224,6 +225,21 @@ sub prepare {
        # Do not treat a suspended VM as paused, as it might wake up
        # during migration and remain paused after migration finishes.
        $self->{vm_was_paused} = 1 if PVE::QemuServer::vm_is_paused($vmid, 0);
+
+       if ($self->{opts}->{'with-conntrack-state'}) {
+           if ($self->{opts}->{remote}) {
+               # shouldn't be reached in normal circumstances anyway, as we 
prevent it on
+               # an API level
+               $self->log('warn', 'conntrack state migration not supported for 
remote migrations, '
+                   . 'active connections might get dropped');
+               $self->{opts}->{'with-conntrack-state'} = 0;
+           } else {
+               PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+           }
+       } else {
+           $self->log('warn', 'conntrack state migration not supported or 
enabled, '
+               . 'active connections might get dropped');
+       }
     }
 
     my ($loc_res, $mapped_res, $missing_mappings_by_node) = 
PVE::QemuServer::check_local_resources($conf, $running, 1);
@@ -859,6 +875,14 @@ sub phase1_cleanup {
     if (my $err =$@) {
        $self->log('err', $err);
     }
+
+    if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+       # if the VM is running, that means we also tried to migrate additional
+       # state via our dbus-vmstate helper
+       # only need to locally stop it, on the target the VM cleanup will
+       # handle it
+       PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+    }
 }
 
 sub phase2_start_local_cluster {
@@ -905,6 +929,10 @@ sub phase2_start_local_cluster {
        push @$cmd, '--targetstorage', ($self->{opts}->{targetstorage} // '1');
     }
 
+    if ($self->{opts}->{'with-conntrack-state'}) {
+       push @$cmd, '--with-conntrack-state';
+    }
+
     my $spice_port;
     my $input = "nbd_protocol_version: $migrate->{nbd_proto_version}\n";
 
@@ -1434,6 +1462,13 @@ sub phase2_cleanup {
        $self->log('err', $err);
     }
 
+    if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+       # if the VM is running, that means we also tried to migrate additional
+       # state via our dbus-vmstate helper
+       # only need to locally stop it, on the target the VM cleanup will
+       # handle it
+       PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+    }
 
     if ($self->{tunnel}) {
        eval { PVE::Tunnel::finish_tunnel($self->{tunnel});  };
@@ -1556,6 +1591,35 @@ sub phase3_cleanup {
                $self->log('info', "skipping guest fstrim, because VM is 
paused");
            }
        }
+
+       if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+           # if the VM is running, that means we also migrated additional
+           # state via our dbus-vmstate helper
+           $self->log('info', 'stopping migration dbus-vmstate helpers');
+
+           # first locally
+           my $num = 
PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+           if (defined($num)) {
+               my $plural = $num > 1 ? "entries" : "entry";
+               $self->log('info', "migrated $num conntrack state $plural");
+           }
+
+           # .. and then remote
+           my $targetnode = $self->{node};
+           eval {
+               # FIXME: introduce proper way to call API methods on another 
node?
+               # See also e.g. pve-network/src/PVE/API2/Network/SDN.pm, which
+               # does something similar.
+               PVE::Tools::run_command([
+                   'pvesh', 'create',
+                   "/nodes/$targetnode/qemu/$vmid/dbus-vmstate",
+                   '--action', 'stop',
+               ]);
+           };
+           if (my $err = $@) {
+               $self->log('warn', "failed to stop dbus-vmstate on $targetnode: 
$err\n");
+           }
+       }
     }
 
     # close tunnel on successful migration, on error phase2_cleanup closed it
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index ffd5d56b..211e02ad 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -62,6 +62,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr 
print_pcie_root_port parse_hostpci);
 use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel 
qemu_objectadd qemu_objectdel);
 use PVE::QemuServer::USB;
+use PVE::QemuServer::DBusVMState;
 
 my $have_sdn;
 eval {
@@ -5559,6 +5560,7 @@ sub vm_start {
 #   replicated_volumes => which volids should be re-used with bitmaps for nbd 
migration
 #   offline_volumes => new volids of offline migrated disks like tpmstate and 
cloudinit, not yet
 #       contained in config
+#   with_conntrack_state => whether to start the dbus-vmstate helper for 
conntrack state migration
 sub vm_start_nolock {
     my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
 
@@ -5956,6 +5958,10 @@ sub vm_start_nolock {
            }
        }
 
+        # conntrack migration is only supported for intra-cluster migrations
+       if ($migrate_opts->{with_conntrack_state} && 
!$migrate_opts->{remote_node}) {
+           PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+       }
     } else {
        mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
            if !$statefile && $conf->{balloon};
diff --git a/PVE/QemuServer/DBusVMState.pm b/PVE/QemuServer/DBusVMState.pm
new file mode 100644
index 00000000..b2e14b7f
--- /dev/null
+++ b/PVE/QemuServer/DBusVMState.pm
@@ -0,0 +1,124 @@
+package PVE::QemuServer::DBusVMState;
+
+use strict;
+use warnings;
+
+use PVE::SafeSyslog;
+use PVE::Systemd;
+use PVE::Tools;
+
+use constant {
+    DBUS_VMSTATE_EXE => '/usr/libexec/qemu-server/dbus-vmstate',
+};
+
+# Retrieves a property from an object from a specific interface name.
+# In contrast to accessing the property directly by using $obj->Property, this
+# actually respects the owner of the object and thus can be used for interfaces
+# with might have multiple (queued) owners on the DBus.
+my sub dbus_get_property {
+    my ($obj, $interface, $name) = @_;
+
+    my $con = $obj->{service}->get_bus()->get_connection();
+
+    my $call = $con->make_method_call_message(
+        $obj->{service}->get_service_name(),
+        $obj->{object_path},
+        'org.freedesktop.DBus.Properties',
+        'Get',
+    );
+
+    $call->set_destination($obj->get_service()->get_owner_name());
+    $call->append_args_list($interface, $name);
+
+    my @reply = $con->send_with_reply_and_block($call, 10 * 
1000)->get_args_list();
+    return $reply[0];
+}
+
+# Starts the dbus-vmstate helper D-Bus service daemon and adds the needed
+# object to the appropriate QEMU instance for the specified VM.
+sub qemu_add_dbus_vmstate {
+    my ($vmid) = @_;
+
+    if (!PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+        die "VM $vmid must be running locally\n";
+    }
+
+    # In case some leftover, previous instance is running, stop it. Otherwise
+    # we run into errors, as a systemd scope is unique.
+    if (defined(qemu_del_dbus_vmstate($vmid, quiet => 1))) {
+        warn "stopped previously running dbus-vmstate helper for VM $vmid\n";
+    }
+
+    # This also ensures that only ever one instance can run
+    PVE::Systemd::enter_systemd_scope(
+        "pve-dbus-vmstate-$vmid",
+        "Proxmox VE dbus-vmstate helper (VM $vmid)",
+    );
+
+    PVE::Tools::run_fork_detached(sub {
+        exec {DBUS_VMSTATE_EXE} DBUS_VMSTATE_EXE, $vmid;
+        die "exec failed: $!\n";
+    });
+}
+
+# Stops the dbus-vmstate helper D-Bus service daemon and removes the associated
+# object from QEMU for the specified VM.
+#
+# Returns the number of migrated conntrack entries, or undef in case of error.
+sub qemu_del_dbus_vmstate {
+    my ($vmid, %params) = @_;
+
+    my $num_entries = undef;
+    my $dbus = Net::DBus->system();
+    my $dbus_obj = $dbus->get_bus_object();
+
+    my $owners = eval { $dbus_obj->ListQueuedOwners('org.qemu.VMState1') };
+    if (my $err = $@) {
+        syslog('warn', "failed to retrieve org.qemu.VMState1 owners: $err\n")
+            if !$params{quiet};
+        return undef;
+    }
+
+    # Iterate through all name owners for 'org.qemu.VMState1' and compare
+    # the ID. If we found the corresponding one for $vmid, call our `Quit` 
method.
+    # Any D-Bus interaction might die/croak, so try to be careful here and 
swallow
+    # any hard errors.
+    foreach my $owner (@$owners) {
+        my $service = eval { Net::DBus::RemoteService->new($dbus, $owner, 
'org.qemu.VMState1') };
+        if (my $err = $@) {
+            syslog('warn', "failed to get org.qemu.VMState1 service from D-Bus 
$owner: $err\n")
+                if !$params{quiet};
+            next;
+        }
+
+        my $object = eval { $service->get_object('/org/qemu/VMState1') };
+        if (my $err = $@) {
+            syslog('warn', "failed to get /org/qemu/VMState1 object from D-Bus 
$owner: $err\n")
+                if !$params{quiet};
+            next;
+        }
+
+        my $id = eval { dbus_get_property($object, 'org.qemu.VMState1', 'Id') 
};
+        if (defined($id) && $id eq "pve-vmstate-$vmid") {
+            my $helperobj = eval { $service->get_object('/org/qemu/VMState1', 
'com.proxmox.VMStateHelper') };
+            if (my $err = $@) {
+                syslog('warn', "found dbus-vmstate helper, but does not 
implement com.proxmox.VMStateHelper? ($err)\n")
+                    if !$params{quiet};
+                last;
+            }
+
+            $num_entries = eval { dbus_get_property($object, 
'com.proxmox.VMStateHelper', 'NumMigratedEntries') };
+            eval { $object->Quit() };
+            if (my $err = $@) {
+                syslog('warn', "failed to call quit on dbus-vmstate for VM 
$vmid: $err\n")
+                    if !$params{quiet};
+            }
+
+            last;
+        }
+    }
+
+    return $num_entries;
+}
+
+1;
diff --git a/PVE/QemuServer/Makefile b/PVE/QemuServer/Makefile
index 18fd13ea..8226bd2f 100644
--- a/PVE/QemuServer/Makefile
+++ b/PVE/QemuServer/Makefile
@@ -3,6 +3,7 @@ SOURCES=PCI.pm          \
        Memory.pm       \
        ImportDisk.pm   \
        Cloudinit.pm    \
+       DBusVMState.pm  \
        Agent.pm        \
        Helpers.pm      \
        Monitor.pm      \
-- 
2.48.1



_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to