On January 27, 2022 3:01 pm, Fabian Ebner wrote: > using the familiar early+repeated checks pattern from other API calls. > Only intended functional changes are with regard to locking/forking.
two questions: - the FW config cloning happens inside the worker now, while it was previously before forking the worker (LGTM, but might be called out explicitly if intentional ;)) - there are some checks at the start of the endpoint (checking storage/target), which are not repeated after the fork+lock - while unlikely, our view of storage.cfg could change in-between (lock guest config -> cfs_update). should those be moved in the check sub (or into the check_storage_access_clone helper)? rest of the series LGTM > > For a full clone of a running VM without guest agent, this also fixes > issuing vm_{resume,suspend} calls for drive mirror completion. > Previously, those just timed out, because of not getting the lock: > >> create full clone of drive scsi0 (rbdkvm:vm-104-disk-0) >> Formatting '/var/lib/vz/images/105/vm-105-disk-0.raw', fmt=raw >> size=4294967296 preallocation=off >> drive mirror is starting for drive-scsi0 >> drive-scsi0: transferred 2.0 MiB of 4.0 GiB (0.05%) in 0s >> drive-scsi0: transferred 635.0 MiB of 4.0 GiB (15.50%) in 1s >> drive-scsi0: transferred 1.6 GiB of 4.0 GiB (40.50%) in 2s >> drive-scsi0: transferred 3.6 GiB of 4.0 GiB (90.23%) in 3s >> drive-scsi0: transferred 4.0 GiB of 4.0 GiB (100.00%) in 4s, ready >> all 'mirror' jobs are ready >> suspend vm >> trying to acquire lock... >> can't lock file '/var/lock/qemu-server/lock-104.conf' - got timeout >> drive-scsi0: Cancelling block job >> drive-scsi0: Done. >> resume vm >> trying to acquire lock... >> can't lock file '/var/lock/qemu-server/lock-104.conf' - got timeout > > Signed-off-by: Fabian Ebner <f.eb...@proxmox.com> > --- > > Best viewed with -w. > > PVE/API2/Qemu.pm | 220 ++++++++++++++++++++++++----------------------- > 1 file changed, 112 insertions(+), 108 deletions(-) > > diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm > index 6992f6f..38e08c8 100644 > --- a/PVE/API2/Qemu.pm > +++ b/PVE/API2/Qemu.pm > @@ -3079,9 +3079,7 @@ __PACKAGE__->register_method({ > > my $running = PVE::QemuServer::check_running($vmid) || 0; > > - my $clonefn = sub { > - # do all tests after lock but before forking worker - if possible > - > + my $load_and_check = sub { > my $conf = PVE::QemuConfig->load_config($vmid); > PVE::QemuConfig->check_lock($conf); > > @@ -3091,7 +3089,7 @@ __PACKAGE__->register_method({ > die "snapshot '$snapname' does not exist\n" > if $snapname && !defined( $conf->{snapshots}->{$snapname}); > > - my $full = extract_param($param, 'full') // > !PVE::QemuConfig->is_template($conf); > + my $full = $param->{full} // !PVE::QemuConfig->is_template($conf); > > die "parameter 'storage' not allowed for linked clones\n" > if defined($storage) && !$full; > @@ -3156,7 +3154,13 @@ __PACKAGE__->register_method({ > } > } > > - # auto generate a new uuid > + return ($conffile, $newconf, $oldconf, $vollist, $drives, > $fullclone); > + }; > + > + my $clonefn = sub { > + my ($conffile, $newconf, $oldconf, $vollist, $drives, $fullclone) = > $load_and_check->(); > + > + # auto generate a new uuid > my $smbios1 = PVE::QemuServer::parse_smbios1($newconf->{smbios1} || > ''); > $smbios1->{uuid} = PVE::QemuServer::generate_uuid(); > $newconf->{smbios1} = PVE::QemuServer::print_smbios1($smbios1); > @@ -3181,105 +3185,99 @@ __PACKAGE__->register_method({ > # FIXME use PVE::QemuConfig->create_and_lock_config and adapt code > PVE::Tools::file_set_contents($conffile, "# qmclone temporary > file\nlock: clone\n"); > > - my $realcmd = sub { > - my $upid = shift; > - > - my $newvollist = []; > - my $jobs = {}; > - > - eval { > - local $SIG{INT} = > - local $SIG{TERM} = > - local $SIG{QUIT} = > - local $SIG{HUP} = sub { die "interrupted by signal\n"; > }; > - > - PVE::Storage::activate_volumes($storecfg, $vollist, > $snapname); > - > - my $bwlimit = extract_param($param, 'bwlimit'); > - > - my $total_jobs = scalar(keys %{$drives}); > - my $i = 1; > - > - foreach my $opt (sort keys %$drives) { > - my $drive = $drives->{$opt}; > - my $skipcomplete = ($total_jobs != $i); # finish after > last drive > - my $completion = $skipcomplete ? 'skip' : 'complete'; > - > - my $src_sid = > PVE::Storage::parse_volume_id($drive->{file}); > - my $storage_list = [ $src_sid ]; > - push @$storage_list, $storage if defined($storage); > - my $clonelimit = > PVE::Storage::get_bandwidth_limit('clone', $storage_list, $bwlimit); > - > - my $newdrive = PVE::QemuServer::clone_disk( > - $storecfg, > - $vmid, > - $running, > - $opt, > - $drive, > - $snapname, > - $newid, > - $storage, > - $format, > - $fullclone->{$opt}, > - $newvollist, > - $jobs, > - $completion, > - $oldconf->{agent}, > - $clonelimit, > - $oldconf > - ); > - > - $newconf->{$opt} = > PVE::QemuServer::print_drive($newdrive); > - > - PVE::QemuConfig->write_config($newid, $newconf); > - $i++; > - } > - > - delete $newconf->{lock}; > - > - # do not write pending changes > - if (my @changes = keys %{$newconf->{pending}}) { > - my $pending = join(',', @changes); > - warn "found pending changes for '$pending', discarding > for clone\n"; > - delete $newconf->{pending}; > - } > - > - PVE::QemuConfig->write_config($newid, $newconf); > - > - if ($target) { > - # always deactivate volumes - avoid lvm LVs to be > active on several nodes > - PVE::Storage::deactivate_volumes($storecfg, $vollist, > $snapname) if !$running; > - PVE::Storage::deactivate_volumes($storecfg, > $newvollist); > - > - my $newconffile = PVE::QemuConfig->config_file($newid, > $target); > - die "Failed to move config to node '$target' - rename > failed: $!\n" > - if !rename($conffile, $newconffile); > - } > - > - PVE::AccessControl::add_vm_to_pool($newid, $pool) if $pool; > - }; > - if (my $err = $@) { > - eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) > }; > - sleep 1; # some storage like rbd need to wait before > release volume - really? > - > - foreach my $volid (@$newvollist) { > - eval { PVE::Storage::vdisk_free($storecfg, $volid); }; > - warn $@ if $@; > - } > - > - PVE::Firewall::remove_vmfw_conf($newid); > - > - unlink $conffile; # avoid races -> last thing before die > - > - die "clone failed: $err"; > - } > - > - return; > - }; > - > PVE::Firewall::clone_vmfw_conf($vmid, $newid); > > - return $rpcenv->fork_worker('qmclone', $vmid, $authuser, $realcmd); > + my $newvollist = []; > + my $jobs = {}; > + > + eval { > + local $SIG{INT} = > + local $SIG{TERM} = > + local $SIG{QUIT} = > + local $SIG{HUP} = sub { die "interrupted by signal\n"; }; > + > + PVE::Storage::activate_volumes($storecfg, $vollist, $snapname); > + > + my $bwlimit = extract_param($param, 'bwlimit'); > + > + my $total_jobs = scalar(keys %{$drives}); > + my $i = 1; > + > + foreach my $opt (sort keys %$drives) { > + my $drive = $drives->{$opt}; > + my $skipcomplete = ($total_jobs != $i); # finish after last > drive > + my $completion = $skipcomplete ? 'skip' : 'complete'; > + > + my $src_sid = PVE::Storage::parse_volume_id($drive->{file}); > + my $storage_list = [ $src_sid ]; > + push @$storage_list, $storage if defined($storage); > + my $clonelimit = PVE::Storage::get_bandwidth_limit('clone', > $storage_list, $bwlimit); > + > + my $newdrive = PVE::QemuServer::clone_disk( > + $storecfg, > + $vmid, > + $running, > + $opt, > + $drive, > + $snapname, > + $newid, > + $storage, > + $format, > + $fullclone->{$opt}, > + $newvollist, > + $jobs, > + $completion, > + $oldconf->{agent}, > + $clonelimit, > + $oldconf > + ); > + > + $newconf->{$opt} = PVE::QemuServer::print_drive($newdrive); > + > + PVE::QemuConfig->write_config($newid, $newconf); > + $i++; > + } > + > + delete $newconf->{lock}; > + > + # do not write pending changes > + if (my @changes = keys %{$newconf->{pending}}) { > + my $pending = join(',', @changes); > + warn "found pending changes for '$pending', discarding for > clone\n"; > + delete $newconf->{pending}; > + } > + > + PVE::QemuConfig->write_config($newid, $newconf); > + > + if ($target) { > + # always deactivate volumes - avoid lvm LVs to be active on > several nodes > + PVE::Storage::deactivate_volumes($storecfg, $vollist, > $snapname) if !$running; > + PVE::Storage::deactivate_volumes($storecfg, $newvollist); > + > + my $newconffile = PVE::QemuConfig->config_file($newid, > $target); > + die "Failed to move config to node '$target' - rename > failed: $!\n" > + if !rename($conffile, $newconffile); > + } > + > + PVE::AccessControl::add_vm_to_pool($newid, $pool) if $pool; > + }; > + if (my $err = $@) { > + eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) }; > + sleep 1; # some storage like rbd need to wait before release > volume - really? > + > + foreach my $volid (@$newvollist) { > + eval { PVE::Storage::vdisk_free($storecfg, $volid); }; > + warn $@ if $@; > + } > + > + PVE::Firewall::remove_vmfw_conf($newid); > + > + unlink $conffile; # avoid races -> last thing before die > + > + die "clone failed: $err"; > + } > + > + return; > }; > > # Aquire exclusive lock lock for $newid > @@ -3287,12 +3285,18 @@ __PACKAGE__->register_method({ > return PVE::QemuConfig->lock_config_full($newid, 1, $clonefn); > }; > > - # exclusive lock if VM is running - else shared lock is enough; > - if ($running) { > - return PVE::QemuConfig->lock_config_full($vmid, 1, $lock_target_vm); > - } else { > - return PVE::QemuConfig->lock_config_shared($vmid, 1, > $lock_target_vm); > - } > + my $lock_source_vm = sub { > + # exclusive lock if VM is running - else shared lock is enough; > + if ($running) { > + return PVE::QemuConfig->lock_config_full($vmid, 1, > $lock_target_vm); > + } else { > + return PVE::QemuConfig->lock_config_shared($vmid, 1, > $lock_target_vm); > + } > + }; > + > + $load_and_check->(); # early checks before forking/locking > + > + return $rpcenv->fork_worker('qmclone', $vmid, $authuser, > $lock_source_vm); > }}); > > __PACKAGE__->register_method({ > -- > 2.30.2 > > > > _______________________________________________ > pve-devel mailing list > pve-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel > > > _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel