On June 27, 2025 5:57 pm, Fiona Ebner wrote: > With blockdev-mirror, it is possible to change the aio setting on the > fly and this is useful for migrations between storages where one wants > to use io_uring by default and the other doesn't. > > The node below the top throttle node needs to be replaced so that the > limits stay intact and that the top node still has the drive ID as the > node name. That node is not necessarily a format node. For example, it > could also be a zeroinit node from an earlier mirror operation. So > query QEMU itself. > > QEMU automatically drops nodes after mirror only if they were > implicitly added, i.e. not explicitly added via blockdev-add. Since a > previous mirror target is explicitly added (and not just implicitly as > the child of a top throttle node), it is necessary to detach the > appropriate block node after mirror. > > Already mock blockdev_mirror in the tests. > > Co-developed-by: Alexandre Derumier <alexandre.derum...@groupe-cyllene.com> > Signed-off-by: Fiona Ebner <f.eb...@proxmox.com> > --- > > NOTE: Changes since last series: > * Query QEMU for file child. > * Remove appropriate node after mirror. > * Delete format property from cloned drive hash for destination. > > src/PVE/QemuServer/BlockJob.pm | 176 ++++++++++++++++++++++ > src/test/MigrationTest/QemuMigrateMock.pm | 8 + > 2 files changed, 184 insertions(+) > > diff --git a/src/PVE/QemuServer/BlockJob.pm b/src/PVE/QemuServer/BlockJob.pm > index 68d0431f..212d6a4f 100644 > --- a/src/PVE/QemuServer/BlockJob.pm > +++ b/src/PVE/QemuServer/BlockJob.pm > @@ -4,12 +4,14 @@ use strict; > use warnings; > > use JSON; > +use Storable qw(dclone); > > use PVE::Format qw(render_duration render_bytes); > use PVE::RESTEnvironment qw(log_warn); > use PVE::Storage; > > use PVE::QemuServer::Agent qw(qga_check_running); > +use PVE::QemuServer::Blockdev; > use PVE::QemuServer::Drive qw(checked_volume_format); > use PVE::QemuServer::Monitor qw(mon_cmd); > use PVE::QemuServer::RunState; > @@ -187,10 +189,17 @@ sub qemu_drive_mirror_monitor { > print "$job_id: Completing block job...\n"; > > my $completion_command; > + # For blockdev, need to detach appropriate node. > QEMU will only drop it if > + # it was implicitly added (e.g. as the child of a > top throttle node), but > + # not if it was explicitly added via blockdev-add > (e.g. as a previous mirror > + # target). > + my $detach_node_name; > if ($completion eq 'complete') { > $completion_command = 'block-job-complete'; > + $detach_node_name = > $jobs->{$job_id}->{'source-node-name'}; > } elsif ($completion eq 'cancel') { > $completion_command = 'block-job-cancel'; > + $detach_node_name = > $jobs->{$job_id}->{'target-node-name'}; > } else { > die "invalid completion value: $completion\n"; > } > @@ -202,6 +211,9 @@ sub qemu_drive_mirror_monitor { > } elsif ($err) { > die "$job_id: block job cannot be completed - > $err\n"; > } else { > + $jobs->{$job_id}->{'detach-node-name'} = > $detach_node_name > + if $detach_node_name; > + > print "$job_id: Completed successfully.\n"; > $jobs->{$job_id}->{complete} = 1; > } > @@ -347,6 +359,170 @@ sub qemu_drive_mirror_switch_to_active_mode { > } > } > > +=pod > + > +=head3 blockdev_mirror > + > + blockdev_mirror($source, $dest, $jobs, $completion, $options) > + > +Mirrors the volume of a running VM specified by C<$source> to destination > C<$dest>. > + > +=over > + > +=item C<$source> > + > +The source information consists of: > + > +=over > + > +=item C<< $source->{vmid} >> > + > +The ID of the running VM the source volume belongs to. > + > +=item C<< $source->{drive} >> > + > +The drive configuration of the source volume as currently attached to the VM. > + > +=item C<< $source->{bitmap} >> > + > +(optional) Use incremental mirroring based on the specified bitmap. > + > +=back > + > +=item C<$dest> > + > +The destination information consists of: > + > +=over > + > +=item C<< $dest->{volid} >> > + > +The volume ID of the target volume. > + > +=item C<< $dest->{vmid} >> > + > +(optional) The ID of the VM the target volume belongs to. Defaults to C<< > $source->{vmid} >>. > + > +=item C<< $dest->{'zero-initialized'} >> > + > +(optional) True, if the target volume is zero-initialized. > + > +=back > + > +=item C<$jobs> > + > +(optional) Other jobs in the transaction when multiple volumes should be > mirrored. All jobs must be > +ready before completion can happen. > + > +=item C<$completion> > + > +Completion mode, default is C<complete>: > + > +=over > + > +=item C<complete> > + > +Wait until all jobs are ready, block-job-complete them (default). This means > switching the orignal > +drive to use the new target. > + > +=item C<cancel> > + > +Wait until all jobs are ready, block-job-cancel them. This means not > switching the original drive > +to use the new target. > + > +=item C<skip> > + > +Wait until all jobs are ready, return with block jobs in ready state. > + > +=item C<auto> > + > +Wait until all jobs disappear, only use for jobs which complete > automatically. > + > +=back > + > +=item C<$options> > + > +Further options: > + > +=over > + > +=item C<< $options->{'guest-agent'} >> > + > +If the guest agent is configured for the VM. It will be used to freeze and > thaw the filesystems for > +consistency when the target belongs to a different VM. > + > +=item C<< $options->{'bwlimit'} >> > + > +The bandwidth limit to use for the mirroring operation, in KiB/s. > + > +=back > + > +=back > + > +=cut > + > +sub blockdev_mirror { > + my ($source, $dest, $jobs, $completion, $options) = @_; > + > + my $vmid = $source->{vmid}; > + > + my $drive_id = PVE::QemuServer::Drive::get_drive_id($source->{drive}); > + my $device_id = "drive-$drive_id"; > + > + my $storecfg = PVE::Storage::config(); > + > + # Need to replace the node below the top node. This is not necessarily a > format node, for > + # example, it can also be a zeroinit node by a previous mirror! So query > QEMU itself. > + my $child_info = mon_cmd($vmid, 'block-node-query-file-child', > 'node-name' => $device_id); > + my $source_node_name = $child_info->{'node-name'};
isn't this semantically equivalent to get_node_name_below_throttle? that one does a few more checks and is slightly more expensive, but validating that the top node is a throttle node as expected might be a good thing here as well? depending on how we see things, we might want to add a `$assert` parameter to that helper though for call sites that are only happening in blockdev context - to avoid the fallback in case the top node is not a throttle group, and instead die? > + > + # Copy original drive config (aio, cache, discard, ...): > + my $dest_drive = dclone($source->{drive}); > + delete($dest_drive->{format}); # cannot use the source's format > + $dest_drive->{file} = $dest->{volid}; > + > + my $generate_blockdev_opts = {}; > + $generate_blockdev_opts->{'zero-initialized'} = 1 if > $dest->{'zero-initialized'}; > + > + # Note that if 'aio' is not explicitly set, i.e. default, it can change > if source and target > + # don't both allow or both not allow 'io_uring' as the default. > + my $target_drive_blockdev = > PVE::QemuServer::Blockdev::generate_drive_blockdev( > + $storecfg, $dest_drive, $generate_blockdev_opts, > + ); > + # Top node is the throttle group, must use the file child. > + my $target_blockdev = $target_drive_blockdev->{file}; should we have an option for generate_drive_blockdev to skip the throttle group/top node? then we could just use Blockdev::attach here.. at least if we make that return the top-level node name or blockdev.. > + > + PVE::QemuServer::Monitor::mon_cmd($vmid, 'blockdev-add', > $target_blockdev->%*); > + my $target_node_name = $target_blockdev->{'node-name'}; > + > + $jobs = {} if !$jobs; > + my $jobid = "mirror-$drive_id"; > + $jobs->{$jobid} = { > + 'source-node-name' => $source_node_name, > + 'target-node-name' => $target_node_name, > + }; > + > + my $qmp_opts = common_mirror_qmp_options( > + $device_id, $target_node_name, $source->{bitmap}, > $options->{bwlimit}, > + ); > + > + $qmp_opts->{'job-id'} = "$jobid"; > + $qmp_opts->{replaces} = "$source_node_name"; > + > + # if a job already runs for this device we get an error, catch it for > cleanup > + eval { mon_cmd($vmid, "blockdev-mirror", $qmp_opts->%*); }; > + if (my $err = $@) { > + eval { qemu_blockjobs_cancel($vmid, $jobs) }; > + log_warn("unable to cancel block jobs - $@"); > + eval { PVE::QemuServer::Blockdev::detach($vmid, $target_node_name); > }; > + log_warn("unable to delete blockdev '$target_node_name' - $@"); > + die "error starting blockdev mirrror - $err"; > + } > + qemu_drive_mirror_monitor( > + $vmid, $dest->{vmid}, $jobs, $completion, $options->{'guest-agent'}, > 'mirror', > + ); > +} > + > sub mirror { > my ($source, $dest, $jobs, $completion, $options) = @_; > > diff --git a/src/test/MigrationTest/QemuMigrateMock.pm > b/src/test/MigrationTest/QemuMigrateMock.pm > index 25a4f9b2..c52df84b 100644 > --- a/src/test/MigrationTest/QemuMigrateMock.pm > +++ b/src/test/MigrationTest/QemuMigrateMock.pm > @@ -9,6 +9,7 @@ use Test::MockModule; > use MigrationTest::Shared; > > use PVE::API2::Qemu; > +use PVE::QemuServer::Drive; > use PVE::Storage; > use PVE::Tools qw(file_set_contents file_get_contents); > > @@ -167,6 +168,13 @@ $qemu_server_blockjob_module->mock( > > common_mirror_mock($vmid, $drive_id); > }, > + blockdev_mirror => sub { > + my ($source, $dest, $jobs, $completion, $options) = @_; > + > + my $drive_id = > PVE::QemuServer::Drive::get_drive_id($source->{drive}); > + > + common_mirror_mock($source->{vmid}, $drive_id); > + }, > qemu_drive_mirror_monitor => sub { > my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_; > > -- > 2.47.2 > > > > _______________________________________________ > pve-devel mailing list > pve-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel > > > _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel