one downside with this part in particular - we have to always allocate full-size LVs (+qcow2 overhead), even if most of them will end up storing just a single snapshot delta which might be a tiny part of that full-size.. hopefully if discard is working across the whole stack this doesn't actually explode space usage on the storage side, but it makes everything a bit hard to track.. OTOH, while we could in theory extend/reduce the LVs and qcow2 images on them when modifying the backing chain, the additional complexity is probably not worth it at the moment..
> Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com> hat am > 16.12.2024 10:12 CET geschrieben: > Signed-off-by: Alexandre Derumier <alexandre.derum...@groupe-cyllene.com> > --- > src/PVE/Storage/LVMPlugin.pm | 231 ++++++++++++++++++++++++++++++++--- > 1 file changed, 213 insertions(+), 18 deletions(-) > > diff --git a/src/PVE/Storage/LVMPlugin.pm b/src/PVE/Storage/LVMPlugin.pm > index 88fd612..1257cd3 100644 > --- a/src/PVE/Storage/LVMPlugin.pm > +++ b/src/PVE/Storage/LVMPlugin.pm > @@ -4,6 +4,7 @@ use strict; > use warnings; > > use IO::File; > +use POSIX qw/ceil/; > > use PVE::Tools qw(run_command trim); > use PVE::Storage::Plugin; > @@ -216,6 +217,7 @@ sub type { > sub plugindata { > return { > content => [ {images => 1, rootdir => 1}, { images => 1 }], > + format => [ { raw => 1, qcow2 => 1 } , 'raw' ], I wonder if we want to guard the snapshotting-related parts below with an additional "snapext" option here as well? or even the usage of qcow2 altogether? > }; > } > > @@ -291,7 +293,10 @@ sub parse_volname { > PVE::Storage::Plugin::parse_lvm_name($volname); > > if ($volname =~ m/^(vm-(\d+)-\S+)$/) { > - return ('images', $1, $2, undef, undef, undef, 'raw'); > + my $name = $1; > + my $vmid = $2; > + my $format = $volname =~ m/\.qcow2$/ ? 'qcow2' : 'raw'; > + return ('images', $name, $vmid, undef, undef, undef, $format); > } > > die "unable to parse lvm volume name '$volname'\n"; > @@ -300,11 +305,13 @@ sub parse_volname { > sub filesystem_path { > my ($class, $scfg, $volname, $snapname) = @_; > > - die "lvm snapshot is not implemented"if defined($snapname); > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = > + $class->parse_volname($volname); > > - my ($vtype, $name, $vmid) = $class->parse_volname($volname); > + die "snapshot is working with qcow2 format only" if defined($snapname) > && $format ne 'qcow2'; > > my $vg = $scfg->{vgname}; > + $name = $class->get_snap_volname($volname, $snapname) if $snapname; > > my $path = "/dev/$vg/$name"; > > @@ -332,7 +339,9 @@ sub find_free_diskname { > > my $disk_list = [ keys %{$lvs->{$vg}} ]; > > - return PVE::Storage::Plugin::get_next_vm_diskname($disk_list, $storeid, > $vmid, undef, $scfg); > + $add_fmt_suffix = $fmt eq 'qcow2' ? 1 : undef; > + > + return PVE::Storage::Plugin::get_next_vm_diskname($disk_list, $storeid, > $vmid, $fmt, $scfg, $add_fmt_suffix); > } > > sub lvcreate { > @@ -363,7 +372,15 @@ sub lvrename { > sub alloc_image { > my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_; > > - die "unsupported format '$fmt'" if $fmt ne 'raw'; > + die "unsupported format '$fmt'" if $fmt !~ m/(raw|qcow2)/; > + > + $name = $class->alloc_new_image($storeid, $scfg, $vmid, $fmt, $name, > $size); > + $class->format_qcow2($storeid, $scfg, $name, $size) if $fmt eq 'qcow2'; > + return $name; > +} > + > +sub alloc_new_image { > + my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_; > > die "illegal name '$name' - should be 'vm-$vmid-*'\n" > if $name && $name !~ m/^vm-$vmid-/; > @@ -376,16 +393,45 @@ sub alloc_image { > > my $free = int($vgs->{$vg}->{free}); > > + > + #add extra space for qcow2 metadatas > + #without sub-allocated clusters : For 1TB storage : l2_size = disk_size > × 8 / cluster_size > + #with sub-allocated clusters : For 1TB storage : l2_size = disk_size × 8 > / cluster_size / 16 > + #4MB overhead for 1TB with extented l2 > clustersize=128k > + > + my $qcow2_overhead = ceil($size/1024/1024/1024) * 4096; there's "qemu-img measure", which seems like it would do exactly what we want ;) > + > + my $lvmsize = $size; > + $lvmsize += $qcow2_overhead if $fmt eq 'qcow2'; > + > die "not enough free space ($free < $size)\n" if $free < $size; > > - $name = $class->find_free_diskname($storeid, $scfg, $vmid) > + $name = $class->find_free_diskname($storeid, $scfg, $vmid, $fmt) > if !$name; > > - lvcreate($vg, $name, $size, ["pve-vm-$vmid"]); > - > + my $tags = ["pve-vm-$vmid"]; > + push @$tags, "\@pve-$name" if $fmt eq 'qcow2'; that's a creative way to avoid the need to discover and activate snapshots one by one below, but it might warrant a comment ;) > + lvcreate($vg, $name, $lvmsize, $tags); > return $name; > } > > +sub format_qcow2 { > + my ($class, $storeid, $scfg, $name, $size, $backing_file) = @_; > + > + # activate volume > + $class->activate_volume($storeid, $scfg, $name, undef, {}); > + my $path = $class->path($scfg, $name, $storeid); > + # create the qcow2 fs > + my $cmd = ['/usr/bin/qemu-img', 'create']; > + push @$cmd, '-b', $backing_file, '-F', 'qcow2' if $backing_file; > + push @$cmd, '-f', 'qcow2', $path; > + push @$cmd, "${size}K" if $size; > + my $options = "extended_l2=on,"; > + $options .= PVE::Storage::Plugin::preallocation_cmd_option($scfg, > 'qcow2'); > + push @$cmd, '-o', $options; > + run_command($cmd); > +} > + > sub free_image { > my ($class, $storeid, $scfg, $volname, $isBase) = @_; > > @@ -536,6 +582,12 @@ sub activate_volume { > > my $lvm_activate_mode = 'ey'; > > + #activate volume && all snapshots volumes by tag > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = > + $class->parse_volname($volname); > + > + $path = "\@pve-$name" if $format eq 'qcow2'; > + > my $cmd = ['/sbin/lvchange', "-a$lvm_activate_mode", $path]; > run_command($cmd, errmsg => "can't activate LV '$path'"); > $cmd = ['/sbin/lvchange', '--refresh', $path]; > @@ -548,6 +600,10 @@ sub deactivate_volume { > my $path = $class->path($scfg, $volname, $storeid, $snapname); > return if ! -b $path; > > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = > + $class->parse_volname($volname); > + $path = "\@pve-$name" if $format eq 'qcow2'; > + > my $cmd = ['/sbin/lvchange', '-aln', $path]; > run_command($cmd, errmsg => "can't deactivate LV '$path'"); > } > @@ -555,15 +611,27 @@ sub deactivate_volume { > sub volume_resize { > my ($class, $scfg, $storeid, $volname, $size, $running) = @_; > > - $size = ($size/1024/1024) . "M"; > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = > + $class->parse_volname($volname); > + > + my $lvmsize = $size / 1024; > + my $qcow2_overhead = ceil($size/1024/1024/1024/1024) * 4096; see above > + $lvmsize += $qcow2_overhead if $format eq 'qcow2'; > + $lvmsize = "${lvmsize}k"; > > my $path = $class->path($scfg, $volname); > - my $cmd = ['/sbin/lvextend', '-L', $size, $path]; > + my $cmd = ['/sbin/lvextend', '-L', $lvmsize, $path]; > > $class->cluster_lock_storage($storeid, $scfg->{shared}, undef, sub { > run_command($cmd, errmsg => "error resizing volume '$path'"); > }); > > + if(!$running && $format eq 'qcow2') { > + my $prealloc_opt = > PVE::Storage::Plugin::preallocation_cmd_option($scfg, $format); > + my $cmd = ['/usr/bin/qemu-img', 'resize', "--$prealloc_opt", '-f', > $format, $path , $size]; > + run_command($cmd, timeout => 10); > + } > + > return 1; > } > > @@ -585,30 +653,149 @@ sub volume_size_info { > sub volume_snapshot { > my ($class, $scfg, $storeid, $volname, $snap) = @_; > > - die "lvm snapshot is not implemented"; > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = > + $class->parse_volname($volname); > + > + die "can't snapshot this image format\n" if $format ne 'qcow2'; > + > + $class->activate_volume($storeid, $scfg, $volname, undef, {}); > + > + my $snap_volname = $class->get_snap_volname($volname, $snap); > + my $snap_path = $class->path($scfg, $volname, $storeid, $snap); > + > + my $size = $class->volume_size_info($scfg, $storeid, $volname, 5); > + > + #rename current lvm volume to snap volume > + my $vg = $scfg->{vgname}; > + print"rename $volname to $snap_volname\n"; > + eval { lvrename($vg, $volname, $snap_volname) } ; missing error handling.. > + > + > + #allocate a new lvm volume > + $class->alloc_new_image($storeid, $scfg, $vmid, 'qcow2', $volname, > $size/1024); missing error handling > + eval { > + $class->format_qcow2($storeid, $scfg, $volname, undef, $snap_path); > + }; > + > + if ($@) { > + eval { $class->free_image($storeid, $scfg, $volname, 0) }; > + warn $@ if $@; > + } > +} > + > +sub volume_rollback_is_possible { > + my ($class, $scfg, $storeid, $volname, $snap, $blockers) = @_; > + > + my $snap_path = $class->path($scfg, $volname, $storeid, $snap); > + > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname); > + my $parent_snap = $snapshots->{current}->{parent}; > + > + return 1 if !-e $snap_path || $snapshots->{$parent_snap}->{file} eq > $snap_path; the first condition here seems wrong, see storage patch #1 > + die "can't rollback, '$snap' is not most recent snapshot on > '$volname'\n"; > + > + return 1; > } > > + > sub volume_snapshot_rollback { > my ($class, $scfg, $storeid, $volname, $snap) = @_; > > - die "lvm snapshot rollback is not implemented"; > + die "can't rollback snapshot this image format\n" if $volname !~ > m/\.(qcow2|qed)$/; above we only have qcow2, which IMHO makes more sense.. > + > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = > + $class->parse_volname($volname); > + > + $class->activate_volume($storeid, $scfg, $volname, undef, {}); > + my $size = $class->volume_size_info($scfg, $storeid, $volname, 5); > + my $snap_path = $class->path($scfg, $volname, $storeid, $snap); > + > + #simply delete the current snapshot and recreate it > + $class->free_image($storeid, $scfg, $volname, 0); > + $class->alloc_new_image($storeid, $scfg, $vmid, 'qcow2', $volname, > $size/1024); > + $class->format_qcow2($storeid, $scfg, $volname, undef, $snap_path); missing error handling.. > + > + return undef; > } > > sub volume_snapshot_delete { > - my ($class, $scfg, $storeid, $volname, $snap) = @_; > + my ($class, $scfg, $storeid, $volname, $snap, $running) = @_; > + > + die "can't delete snapshot for this image format\n" if $volname !~ > m/\.(qcow2|qed)$/; same as above > + > + return 1 if $running; > + > + my $cmd = ""; > + my $path = $class->filesystem_path($scfg, $volname); > + > + > + my $snapshots = $class->volume_snapshot_info($scfg, $storeid, $volname); > + my $snap_path = $snapshots->{$snap}->{file}; > + my $snap_volname = $snapshots->{$snap}->{volname}; > + return if !-e $snap_path; #already deleted ? should maybe be a die? > + > + my $parent_snap = $snapshots->{$snap}->{parent}; > + my $child_snap = $snapshots->{$snap}->{child}; > + > + my $parent_path = $snapshots->{$parent_snap}->{file} if $parent_snap; > + my $child_path = $snapshots->{$child_snap}->{file} if $child_snap; > + my $child_volname = $snapshots->{$child_snap}->{volname} if $child_snap; > + > + > + #if first snapshot, we merge child, and rename the snapshot to child > + if(!$parent_snap) { > + #we use commit here, as it's faster than rebase > + > #https://lists.gnu.org/archive/html/qemu-discuss/2019-08/msg00041.html > + print"commit $child_path\n"; > + $cmd = ['/usr/bin/qemu-img', 'commit', $child_path]; > + run_command($cmd); > + print"delete $child_volname\n"; > + $class->free_image($storeid, $scfg, $child_volname, 0); > + > + print"rename $snap_volname to $child_volname\n"; > + my $vg = $scfg->{vgname}; > + lvrename($vg, $snap_volname, $child_volname); missing error handling.. > + } else { > + print"commit $snap_path\n"; > + $cmd = ['/usr/bin/qemu-img', 'commit', $snap_path]; leftover? > + #if we delete an intermediate snapshot, we need to link upper > snapshot to base snapshot > + die "missing parentsnap snapshot to rebase child $child_path\n" if > !$parent_path; > + print "link $child_snap to $parent_snap\n"; > + $cmd = ['/usr/bin/qemu-img', 'rebase', '-u', '-b', $parent_path, > '-F', 'qcow2', '-f', 'qcow2', $child_path]; > + run_command($cmd); same as for patch #1, I am not sure the -u here is correct.. > + #delete the snapshot > + $class->free_image($storeid, $scfg, $snap_volname, 0); > + } > > - die "lvm snapshot delete is not implemented"; > } > > sub volume_has_feature { > my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running) = > @_; > > my $features = { > - copy => { base => 1, current => 1}, > - rename => {current => 1}, > + copy => { > + base => { qcow2 => 1, raw => 1}, > + current => { qcow2 => 1, raw => 1}, > + snap => { qcow2 => 1 }, > + }, > + 'rename' => { > + current => { qcow2 => 1, raw => 1}, > + }, > + snapshot => { > + current => { qcow2 => 1 }, > + snap => { qcow2 => 1 }, > + }, > + template => { > + current => { qcow2 => 1, raw => 1}, > + }, > +# don't allow to clone as we can't activate the base on multiple host at > the same time > +# clone => { > +# base => { qcow2 => 1, raw => 1}, > +# }, I think activating the base would actually be okay, we just must never write to it? ;) > }; > > - my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) = > + > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = > $class->parse_volname($volname); > > my $key = undef; > @@ -617,7 +804,7 @@ sub volume_has_feature { > }else{ > $key = $isBase ? 'base' : 'current'; > } > - return 1 if $features->{$feature}->{$key}; > + return 1 if defined($features->{$feature}->{$key}->{$format}); > > return undef; > } > @@ -738,4 +925,12 @@ sub rename_volume { > return "${storeid}:${target_volname}"; > } > > +sub get_snap_volname { > + my ($class, $volname, $snapname) = @_; > + > + my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $format) = > $class->parse_volname($volname); > + $name = !$snapname || $snapname eq 'current' ? $volname : > "snap-$snapname-$name"; > + return $name; > +} > + > 1; > -- > 2.39.5 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel