On January 27, 2022 3:01 pm, Fabian Ebner wrote:
> using the familiar early+repeated checks pattern from other API calls.
> Only intended functional changes are with regard to locking/forking.

two questions:
- the FW config cloning happens inside the worker now, while it was 
  previously before forking the worker (LGTM, but might be called out 
  explicitly if intentional ;))
- there are some checks at the start of the endpoint (checking 
  storage/target), which are not repeated after the fork+lock - while 
  unlikely, our view of storage.cfg could change in-between (lock guest 
  config -> cfs_update). should those be moved in the check sub (or into 
  the check_storage_access_clone helper)?

rest of the series LGTM

> 
> For a full clone of a running VM without guest agent, this also fixes
> issuing vm_{resume,suspend} calls for drive mirror completion.
> Previously, those just timed out, because of not getting the lock:
> 
>> create full clone of drive scsi0 (rbdkvm:vm-104-disk-0)
>> Formatting '/var/lib/vz/images/105/vm-105-disk-0.raw', fmt=raw
>> size=4294967296 preallocation=off
>> drive mirror is starting for drive-scsi0
>> drive-scsi0: transferred 2.0 MiB of 4.0 GiB (0.05%) in 0s
>> drive-scsi0: transferred 635.0 MiB of 4.0 GiB (15.50%) in 1s
>> drive-scsi0: transferred 1.6 GiB of 4.0 GiB (40.50%) in 2s
>> drive-scsi0: transferred 3.6 GiB of 4.0 GiB (90.23%) in 3s
>> drive-scsi0: transferred 4.0 GiB of 4.0 GiB (100.00%) in 4s, ready
>> all 'mirror' jobs are ready
>> suspend vm
>> trying to acquire lock...
>> can't lock file '/var/lock/qemu-server/lock-104.conf' - got timeout
>> drive-scsi0: Cancelling block job
>> drive-scsi0: Done.
>> resume vm
>> trying to acquire lock...
>> can't lock file '/var/lock/qemu-server/lock-104.conf' - got timeout
> 
> Signed-off-by: Fabian Ebner <f.eb...@proxmox.com>
> ---
> 
> Best viewed with -w.
> 
>  PVE/API2/Qemu.pm | 220 ++++++++++++++++++++++++-----------------------
>  1 file changed, 112 insertions(+), 108 deletions(-)
> 
> diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
> index 6992f6f..38e08c8 100644
> --- a/PVE/API2/Qemu.pm
> +++ b/PVE/API2/Qemu.pm
> @@ -3079,9 +3079,7 @@ __PACKAGE__->register_method({
>  
>       my $running = PVE::QemuServer::check_running($vmid) || 0;
>  
> -     my $clonefn = sub {
> -         # do all tests after lock but before forking worker - if possible
> -
> +     my $load_and_check = sub {
>           my $conf = PVE::QemuConfig->load_config($vmid);
>           PVE::QemuConfig->check_lock($conf);
>  
> @@ -3091,7 +3089,7 @@ __PACKAGE__->register_method({
>           die "snapshot '$snapname' does not exist\n"
>               if $snapname && !defined( $conf->{snapshots}->{$snapname});
>  
> -         my $full = extract_param($param, 'full') // 
> !PVE::QemuConfig->is_template($conf);
> +         my $full = $param->{full} // !PVE::QemuConfig->is_template($conf);
>  
>           die "parameter 'storage' not allowed for linked clones\n"
>               if defined($storage) && !$full;
> @@ -3156,7 +3154,13 @@ __PACKAGE__->register_method({
>               }
>           }
>  
> -            # auto generate a new uuid
> +         return ($conffile, $newconf, $oldconf, $vollist, $drives, 
> $fullclone);
> +     };
> +
> +     my $clonefn = sub {
> +         my ($conffile, $newconf, $oldconf, $vollist, $drives, $fullclone) = 
> $load_and_check->();
> +
> +         # auto generate a new uuid
>           my $smbios1 = PVE::QemuServer::parse_smbios1($newconf->{smbios1} || 
> '');
>           $smbios1->{uuid} = PVE::QemuServer::generate_uuid();
>           $newconf->{smbios1} = PVE::QemuServer::print_smbios1($smbios1);
> @@ -3181,105 +3185,99 @@ __PACKAGE__->register_method({
>           # FIXME use PVE::QemuConfig->create_and_lock_config and adapt code
>           PVE::Tools::file_set_contents($conffile, "# qmclone temporary 
> file\nlock: clone\n");
>  
> -         my $realcmd = sub {
> -             my $upid = shift;
> -
> -             my $newvollist = [];
> -             my $jobs = {};
> -
> -             eval {
> -                 local $SIG{INT} =
> -                     local $SIG{TERM} =
> -                     local $SIG{QUIT} =
> -                     local $SIG{HUP} = sub { die "interrupted by signal\n"; 
> };
> -
> -                 PVE::Storage::activate_volumes($storecfg, $vollist, 
> $snapname);
> -
> -                 my $bwlimit = extract_param($param, 'bwlimit');
> -
> -                 my $total_jobs = scalar(keys %{$drives});
> -                 my $i = 1;
> -
> -                 foreach my $opt (sort keys %$drives) {
> -                     my $drive = $drives->{$opt};
> -                     my $skipcomplete = ($total_jobs != $i); # finish after 
> last drive
> -                     my $completion = $skipcomplete ? 'skip' : 'complete';
> -
> -                     my $src_sid = 
> PVE::Storage::parse_volume_id($drive->{file});
> -                     my $storage_list = [ $src_sid ];
> -                     push @$storage_list, $storage if defined($storage);
> -                     my $clonelimit = 
> PVE::Storage::get_bandwidth_limit('clone', $storage_list, $bwlimit);
> -
> -                     my $newdrive = PVE::QemuServer::clone_disk(
> -                         $storecfg,
> -                         $vmid,
> -                         $running,
> -                         $opt,
> -                         $drive,
> -                         $snapname,
> -                         $newid,
> -                         $storage,
> -                         $format,
> -                         $fullclone->{$opt},
> -                         $newvollist,
> -                         $jobs,
> -                         $completion,
> -                         $oldconf->{agent},
> -                         $clonelimit,
> -                         $oldconf
> -                     );
> -
> -                     $newconf->{$opt} = 
> PVE::QemuServer::print_drive($newdrive);
> -
> -                     PVE::QemuConfig->write_config($newid, $newconf);
> -                     $i++;
> -                 }
> -
> -                 delete $newconf->{lock};
> -
> -                 # do not write pending changes
> -                 if (my @changes = keys %{$newconf->{pending}}) {
> -                     my $pending = join(',', @changes);
> -                     warn "found pending changes for '$pending', discarding 
> for clone\n";
> -                     delete $newconf->{pending};
> -                 }
> -
> -                 PVE::QemuConfig->write_config($newid, $newconf);
> -
> -                    if ($target) {
> -                     # always deactivate volumes - avoid lvm LVs to be 
> active on several nodes
> -                     PVE::Storage::deactivate_volumes($storecfg, $vollist, 
> $snapname) if !$running;
> -                     PVE::Storage::deactivate_volumes($storecfg, 
> $newvollist);
> -
> -                     my $newconffile = PVE::QemuConfig->config_file($newid, 
> $target);
> -                     die "Failed to move config to node '$target' - rename 
> failed: $!\n"
> -                         if !rename($conffile, $newconffile);
> -                 }
> -
> -                 PVE::AccessControl::add_vm_to_pool($newid, $pool) if $pool;
> -             };
> -             if (my $err = $@) {
> -                 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) 
> };
> -                 sleep 1; # some storage like rbd need to wait before 
> release volume - really?
> -
> -                 foreach my $volid (@$newvollist) {
> -                     eval { PVE::Storage::vdisk_free($storecfg, $volid); };
> -                     warn $@ if $@;
> -                 }
> -
> -                 PVE::Firewall::remove_vmfw_conf($newid);
> -
> -                 unlink $conffile; # avoid races -> last thing before die
> -
> -                 die "clone failed: $err";
> -             }
> -
> -             return;
> -         };
> -
>           PVE::Firewall::clone_vmfw_conf($vmid, $newid);
>  
> -         return $rpcenv->fork_worker('qmclone', $vmid, $authuser, $realcmd);
> +         my $newvollist = [];
> +         my $jobs = {};
> +
> +         eval {
> +             local $SIG{INT} =
> +                 local $SIG{TERM} =
> +                 local $SIG{QUIT} =
> +                 local $SIG{HUP} = sub { die "interrupted by signal\n"; };
> +
> +             PVE::Storage::activate_volumes($storecfg, $vollist, $snapname);
> +
> +             my $bwlimit = extract_param($param, 'bwlimit');
> +
> +             my $total_jobs = scalar(keys %{$drives});
> +             my $i = 1;
> +
> +             foreach my $opt (sort keys %$drives) {
> +                 my $drive = $drives->{$opt};
> +                 my $skipcomplete = ($total_jobs != $i); # finish after last 
> drive
> +                 my $completion = $skipcomplete ? 'skip' : 'complete';
> +
> +                 my $src_sid = PVE::Storage::parse_volume_id($drive->{file});
> +                 my $storage_list = [ $src_sid ];
> +                 push @$storage_list, $storage if defined($storage);
> +                 my $clonelimit = PVE::Storage::get_bandwidth_limit('clone', 
> $storage_list, $bwlimit);
> +
> +                 my $newdrive = PVE::QemuServer::clone_disk(
> +                     $storecfg,
> +                     $vmid,
> +                     $running,
> +                     $opt,
> +                     $drive,
> +                     $snapname,
> +                     $newid,
> +                     $storage,
> +                     $format,
> +                     $fullclone->{$opt},
> +                     $newvollist,
> +                     $jobs,
> +                     $completion,
> +                     $oldconf->{agent},
> +                     $clonelimit,
> +                     $oldconf
> +                 );
> +
> +                 $newconf->{$opt} = PVE::QemuServer::print_drive($newdrive);
> +
> +                 PVE::QemuConfig->write_config($newid, $newconf);
> +                 $i++;
> +             }
> +
> +             delete $newconf->{lock};
> +
> +             # do not write pending changes
> +             if (my @changes = keys %{$newconf->{pending}}) {
> +                 my $pending = join(',', @changes);
> +                 warn "found pending changes for '$pending', discarding for 
> clone\n";
> +                 delete $newconf->{pending};
> +             }
> +
> +             PVE::QemuConfig->write_config($newid, $newconf);
> +
> +             if ($target) {
> +                 # always deactivate volumes - avoid lvm LVs to be active on 
> several nodes
> +                 PVE::Storage::deactivate_volumes($storecfg, $vollist, 
> $snapname) if !$running;
> +                 PVE::Storage::deactivate_volumes($storecfg, $newvollist);
> +
> +                 my $newconffile = PVE::QemuConfig->config_file($newid, 
> $target);
> +                 die "Failed to move config to node '$target' - rename 
> failed: $!\n"
> +                     if !rename($conffile, $newconffile);
> +             }
> +
> +             PVE::AccessControl::add_vm_to_pool($newid, $pool) if $pool;
> +         };
> +         if (my $err = $@) {
> +             eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
> +             sleep 1; # some storage like rbd need to wait before release 
> volume - really?
> +
> +             foreach my $volid (@$newvollist) {
> +                 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
> +                 warn $@ if $@;
> +             }
> +
> +             PVE::Firewall::remove_vmfw_conf($newid);
> +
> +             unlink $conffile; # avoid races -> last thing before die
> +
> +             die "clone failed: $err";
> +         }
> +
> +         return;
>       };
>  
>       # Aquire exclusive lock lock for $newid
> @@ -3287,12 +3285,18 @@ __PACKAGE__->register_method({
>           return PVE::QemuConfig->lock_config_full($newid, 1, $clonefn);
>       };
>  
> -     # exclusive lock if VM is running - else shared lock is enough;
> -     if ($running) {
> -         return PVE::QemuConfig->lock_config_full($vmid, 1, $lock_target_vm);
> -     } else {
> -         return PVE::QemuConfig->lock_config_shared($vmid, 1, 
> $lock_target_vm);
> -     }
> +     my $lock_source_vm = sub {
> +         # exclusive lock if VM is running - else shared lock is enough;
> +         if ($running) {
> +             return PVE::QemuConfig->lock_config_full($vmid, 1, 
> $lock_target_vm);
> +         } else {
> +             return PVE::QemuConfig->lock_config_shared($vmid, 1, 
> $lock_target_vm);
> +         }
> +     };
> +
> +     $load_and_check->(); # early checks before forking/locking
> +
> +     return $rpcenv->fork_worker('qmclone', $vmid, $authuser, 
> $lock_source_vm);
>      }});
>  
>  __PACKAGE__->register_method({
> -- 
> 2.30.2
> 
> 
> 
> _______________________________________________
> pve-devel mailing list
> pve-devel@lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
> 
> 
> 


_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to