With blockdev-mirror, it is possible to change the aio setting on the
fly and this is useful for migrations between storages where one wants
to use io_uring by default and the other doesn't.

The node below the top throttle node needs to be replaced so that the
limits stay intact and that the top node still has the drive ID as the
node name. That node is not necessarily a format node. For example, it
could also be a zeroinit node from an earlier mirror operation. So
query QEMU itself.

QEMU automatically drops nodes after mirror only if they were
implicitly added, i.e. not explicitly added via blockdev-add. Since a
previous mirror target is explicitly added (and not just implicitly as
the child of a top throttle node), it is necessary to detach the
appropriate block node after mirror.

Already mock blockdev_mirror in the tests.

Co-developed-by: Alexandre Derumier <alexandre.derum...@groupe-cyllene.com>
Signed-off-by: Fiona Ebner <f.eb...@proxmox.com>
---

Changes in v2:
* Use assert parameter for get_node_name_below_throttle().
* Use attach() helper with the new 'no-throttle' option.
* More compact POD.

 src/PVE/QemuServer/BlockJob.pm            | 141 ++++++++++++++++++++++
 src/test/MigrationTest/QemuMigrateMock.pm |   8 ++
 2 files changed, 149 insertions(+)

diff --git a/src/PVE/QemuServer/BlockJob.pm b/src/PVE/QemuServer/BlockJob.pm
index 68d0431f..9131780e 100644
--- a/src/PVE/QemuServer/BlockJob.pm
+++ b/src/PVE/QemuServer/BlockJob.pm
@@ -4,12 +4,14 @@ use strict;
 use warnings;
 
 use JSON;
+use Storable qw(dclone);
 
 use PVE::Format qw(render_duration render_bytes);
 use PVE::RESTEnvironment qw(log_warn);
 use PVE::Storage;
 
 use PVE::QemuServer::Agent qw(qga_check_running);
+use PVE::QemuServer::Blockdev;
 use PVE::QemuServer::Drive qw(checked_volume_format);
 use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer::RunState;
@@ -187,10 +189,17 @@ sub qemu_drive_mirror_monitor {
                         print "$job_id: Completing block job...\n";
 
                         my $completion_command;
+                        # For blockdev, need to detach appropriate node. QEMU 
will only drop it if
+                        # it was implicitly added (e.g. as the child of a top 
throttle node), but
+                        # not if it was explicitly added via blockdev-add 
(e.g. as a previous mirror
+                        # target).
+                        my $detach_node_name;
                         if ($completion eq 'complete') {
                             $completion_command = 'block-job-complete';
+                            $detach_node_name = 
$jobs->{$job_id}->{'source-node-name'};
                         } elsif ($completion eq 'cancel') {
                             $completion_command = 'block-job-cancel';
+                            $detach_node_name = 
$jobs->{$job_id}->{'target-node-name'};
                         } else {
                             die "invalid completion value: $completion\n";
                         }
@@ -202,6 +211,9 @@ sub qemu_drive_mirror_monitor {
                         } elsif ($err) {
                             die "$job_id: block job cannot be completed - 
$err\n";
                         } else {
+                            $jobs->{$job_id}->{'detach-node-name'} = 
$detach_node_name
+                                if $detach_node_name;
+
                             print "$job_id: Completed successfully.\n";
                             $jobs->{$job_id}->{complete} = 1;
                         }
@@ -347,6 +359,135 @@ sub qemu_drive_mirror_switch_to_active_mode {
     }
 }
 
+=pod
+
+=head3 blockdev_mirror
+
+    blockdev_mirror($source, $dest, $jobs, $completion, $options)
+
+Mirrors the volume of a running VM specified by C<$source> to destination 
C<$dest>.
+
+=over
+
+=item C<$source>: The source information consists of:
+
+=over
+
+=item C<< $source->{vmid} >>: The ID of the running VM the source volume 
belongs to.
+
+=item C<< $source->{drive} >>: The drive configuration of the source volume as 
currently attached to
+the VM.
+
+=item C<< $source->{bitmap} >>: (optional) Use incremental mirroring based on 
the specified bitmap.
+
+=back
+
+=item C<$dest>: The destination information consists of:
+
+=over
+
+=item C<< $dest->{volid} >>: The volume ID of the target volume.
+
+=item C<< $dest->{vmid} >>: (optional) The ID of the VM the target volume 
belongs to. Defaults to
+C<< $source->{vmid} >>.
+
+=item C<< $dest->{'zero-initialized'} >>: (optional) True, if the target 
volume is zero-initialized.
+
+=back
+
+=item C<$jobs>: (optional) Other jobs in the transaction when multiple volumes 
should be mirrored.
+All jobs must be ready before completion can happen.
+
+=item C<$completion>: Completion mode, default is C<complete>:
+
+=over
+
+=item C<complete>: Wait until all jobs are ready, block-job-complete them 
(default). This means
+switching the orignal drive to use the new target.
+
+=item C<cancel>: Wait until all jobs are ready, block-job-cancel them. This 
means not switching thex
+original drive to use the new target.
+
+=item C<skip>: Wait until all jobs are ready, return with block jobs in ready 
state.
+
+=item C<auto>: Wait until all jobs disappear, only use for jobs which complete 
automatically.
+
+=back
+
+=item C<$options>: Further options:
+
+=over
+
+=item C<< $options->{'guest-agent'} >>: If the guest agent is configured for 
the VM. It will be used
+to freeze and thaw the filesystems for consistency when the target belongs to 
a different VM.
+
+=item C<< $options->{'bwlimit'} >>: The bandwidth limit to use for the 
mirroring operation, in
+KiB/s.
+
+=back
+
+=back
+
+=cut
+
+sub blockdev_mirror {
+    my ($source, $dest, $jobs, $completion, $options) = @_;
+
+    my $vmid = $source->{vmid};
+
+    my $drive_id = PVE::QemuServer::Drive::get_drive_id($source->{drive});
+    my $device_id = "drive-$drive_id";
+
+    my $storecfg = PVE::Storage::config();
+
+    # Need to replace the node below the top node. This is not necessarily a 
format node, for
+    # example, it can also be a zeroinit node by a previous mirror! So query 
QEMU itself.
+    my $source_node_name =
+        PVE::QemuServer::Blockdev::get_node_name_below_throttle($vmid, 
$device_id, 1);
+
+    # Copy original drive config (aio, cache, discard, ...):
+    my $dest_drive = dclone($source->{drive});
+    delete($dest_drive->{format}); # cannot use the source's format
+    $dest_drive->{file} = $dest->{volid};
+
+    # Mirror happens below the throttle filter, so if the target is for the 
same VM, it will end up
+    # below the source's throttle filter, which is inserted for the drive 
device.
+    my $attach_dest_opts = { 'no-throttle' => 1 };
+    $attach_dest_opts->{'zero-initialized'} = 1 if $dest->{'zero-initialized'};
+
+    # Note that if 'aio' is not explicitly set, i.e. default, it can change if 
source and target
+    # don't both allow or both not allow 'io_uring' as the default.
+    my $target_node_name =
+        PVE::QemuServer::Blockdev::attach($storecfg, $vmid, $dest_drive, 
$attach_dest_opts);
+
+    $jobs = {} if !$jobs;
+    my $jobid = "mirror-$drive_id";
+    $jobs->{$jobid} = {
+        'source-node-name' => $source_node_name,
+        'target-node-name' => $target_node_name,
+    };
+
+    my $qmp_opts = common_mirror_qmp_options(
+        $device_id, $target_node_name, $source->{bitmap}, $options->{bwlimit},
+    );
+
+    $qmp_opts->{'job-id'} = "$jobid";
+    $qmp_opts->{replaces} = "$source_node_name";
+
+    # if a job already runs for this device we get an error, catch it for 
cleanup
+    eval { mon_cmd($vmid, "blockdev-mirror", $qmp_opts->%*); };
+    if (my $err = $@) {
+        eval { qemu_blockjobs_cancel($vmid, $jobs) };
+        log_warn("unable to cancel block jobs - $@");
+        eval { PVE::QemuServer::Blockdev::detach($vmid, $target_node_name); };
+        log_warn("unable to delete blockdev '$target_node_name' - $@");
+        die "error starting blockdev mirrror - $err";
+    }
+    qemu_drive_mirror_monitor(
+        $vmid, $dest->{vmid}, $jobs, $completion, $options->{'guest-agent'}, 
'mirror',
+    );
+}
+
 sub mirror {
     my ($source, $dest, $jobs, $completion, $options) = @_;
 
diff --git a/src/test/MigrationTest/QemuMigrateMock.pm 
b/src/test/MigrationTest/QemuMigrateMock.pm
index 25a4f9b2..c52df84b 100644
--- a/src/test/MigrationTest/QemuMigrateMock.pm
+++ b/src/test/MigrationTest/QemuMigrateMock.pm
@@ -9,6 +9,7 @@ use Test::MockModule;
 use MigrationTest::Shared;
 
 use PVE::API2::Qemu;
+use PVE::QemuServer::Drive;
 use PVE::Storage;
 use PVE::Tools qw(file_set_contents file_get_contents);
 
@@ -167,6 +168,13 @@ $qemu_server_blockjob_module->mock(
 
         common_mirror_mock($vmid, $drive_id);
     },
+    blockdev_mirror => sub {
+        my ($source, $dest, $jobs, $completion, $options) = @_;
+
+        my $drive_id = PVE::QemuServer::Drive::get_drive_id($source->{drive});
+
+        common_mirror_mock($source->{vmid}, $drive_id);
+    },
     qemu_drive_mirror_monitor => sub {
         my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_;
 
-- 
2.47.2



_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to