--- Begin Message ---
fixme:
 - add test for internal (was missing) && external qemu snapshots
 - is it possible to use blockjob transactions for commit && steam
   for atomatic disk commit ?

Signed-off-by: Alexandre Derumier <alexandre.derum...@groupe-cyllene.com>
---
 PVE/QemuConfig.pm          |   4 +-
 PVE/QemuServer.pm          |  79 ++++++++++-----
 PVE/QemuServer/Blockdev.pm | 199 ++++++++++++++++++++++++++++++++++---
 test/snapshot-test.pm      |   4 +-
 4 files changed, 245 insertions(+), 41 deletions(-)

diff --git a/PVE/QemuConfig.pm b/PVE/QemuConfig.pm
index 2609542c..785c84a2 100644
--- a/PVE/QemuConfig.pm
+++ b/PVE/QemuConfig.pm
@@ -378,7 +378,7 @@ sub __snapshot_create_vol_snapshot {
 
     print "snapshotting '$device' ($drive->{file})\n";
 
-    PVE::QemuServer::qemu_volume_snapshot($vmid, $device, $storecfg, $volid, 
$snapname);
+    PVE::QemuServer::qemu_volume_snapshot($vmid, $device, $storecfg, $drive, 
$snapname);
 }
 
 sub __snapshot_delete_remove_drive {
@@ -415,7 +415,7 @@ sub __snapshot_delete_vol_snapshot {
     my $storecfg = PVE::Storage::config();
     my $volid = $drive->{file};
 
-    PVE::QemuServer::qemu_volume_snapshot_delete($vmid, $storecfg, $volid, 
$snapname);
+    PVE::QemuServer::qemu_volume_snapshot_delete($vmid, $storecfg, $drive, 
$snapname);
 
     push @$unused, $volid;
 }
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 53135894..9857c536 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -52,7 +52,7 @@ use PVE::QMPClient;
 use PVE::QemuConfig;
 use PVE::QemuConfig::NoWrite;
 use PVE::QemuServer::Helpers qw(config_aware_timeout min_version 
kvm_user_version windows_version);
-use PVE::QemuServer::Blockdev qw(generate_drive_blockdev 
generate_throttle_group);
+use PVE::QemuServer::Blockdev qw(generate_drive_blockdev 
generate_throttle_group blockdev_external_snapshot blockdev_delete 
blockdev_rename blockdev_commit blockdev_stream);
 use PVE::QemuServer::Cloudinit;
 use PVE::QemuServer::CGroup;
 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options 
get_cpu_bitness is_native_arch get_amd_sev_object get_amd_sev_type);
@@ -4677,20 +4677,37 @@ sub qemu_block_resize {
 }
 
 sub qemu_volume_snapshot {
-    my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
+    my ($vmid, $deviceid, $storecfg, $drive, $snap) = @_;
 
+    my $volid = $drive->{file};
     my $running = check_running($vmid);
 
-    if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
+    my $do_snapshots_type = do_snapshots_type($storecfg, $volid, $deviceid, 
$running);
+
+    if ($do_snapshots_type eq 'internal') {
+       print "internal qemu snapshot\n";
        mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, 
name => $snap);
-    } else {
+    } elsif ($do_snapshots_type eq 'external') {
+       my $storeid = (PVE::Storage::parse_volume_id($volid))[0];
+       my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+       print "external qemu snapshot\n";
+       my $snapshots = PVE::Storage::volume_snapshot_info($storecfg, $volid);
+       my $parent_snap = $snapshots->{'current'}->{parent};
+       blockdev_rename($storecfg, $vmid, $deviceid, $drive, 'current', $snap, 
$parent_snap);
+       eval { blockdev_external_snapshot($storecfg, $vmid, $deviceid, $drive, 
$snap) };
+       if ($@) {
+           print "error creating snapshot. Revert rename\n";
+           eval { blockdev_rename($storecfg, $vmid, $deviceid, $drive, $snap, 
'current', $parent_snap) };
+       }
+    } elsif ($do_snapshots_type eq 'storage') {
        PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
     }
 }
 
 sub qemu_volume_snapshot_delete {
-    my ($vmid, $storecfg, $volid, $snap) = @_;
+    my ($vmid, $storecfg, $drive, $snap) = @_;
 
+    my $volid = $drive->{file};
     my $running = check_running($vmid);
     my $attached_deviceid;
 
@@ -4702,14 +4719,35 @@ sub qemu_volume_snapshot_delete {
        });
     }
 
-    if ($attached_deviceid && do_snapshots_with_qemu($storecfg, $volid, 
$attached_deviceid)) {
+    my $do_snapshots_type = do_snapshots_type($storecfg, $volid, 
$attached_deviceid, $running);
+
+    if ($do_snapshots_type eq 'internal') {
        mon_cmd(
            $vmid,
            'blockdev-snapshot-delete-internal-sync',
            device => $attached_deviceid,
            name => $snap,
        );
-    } else {
+    } elsif ($do_snapshots_type eq 'external') {
+       print "delete qemu external snapshot\n";
+
+       my $path = PVE::Storage::path($storecfg, $volid);
+       my $snapshots = PVE::Storage::volume_snapshot_info($storecfg, $volid);
+       my $parentsnap = $snapshots->{$snap}->{parent};
+       my $childsnap = $snapshots->{$snap}->{child};
+
+       # if we delete the first snasphot, we commit because the first snapshot 
original base image, it should be big.
+       # improve-me: if firstsnap > child : commit, if firstsnap < child do a 
stream.
+       if(!$parentsnap) {
+           print"delete first snapshot $snap\n";
+           blockdev_commit($storecfg, $vmid, $attached_deviceid, $drive, 
$childsnap, $snap);
+           blockdev_rename($storecfg, $vmid, $attached_deviceid, $drive, 
$snap, $childsnap, $snapshots->{$childsnap}->{child});
+       } else {
+           #intermediate snapshot, we always stream the snapshot to child 
snapshot
+           print"stream intermediate snapshot $snap to $childsnap\n";
+           blockdev_stream($storecfg, $vmid, $attached_deviceid, $drive, 
$snap, $parentsnap, $childsnap);
+       }
+    } elsif ($do_snapshots_type eq 'storage') {
        PVE::Storage::volume_snapshot_delete(
            $storecfg, $volid, $snap, $attached_deviceid ? 1 : undef);
     }
@@ -7976,27 +8014,20 @@ sub foreach_storage_used_by_vm {
     }
 }
 
-my $qemu_snap_storage = {
-    rbd => 1,
-};
-sub do_snapshots_with_qemu {
-    my ($storecfg, $volid, $deviceid) = @_;
+sub do_snapshots_type {
+    my ($storecfg, $volid, $deviceid, $running) = @_;
 
-    return if $deviceid =~ m/tpmstate0/;
+    #we skip snapshot for tpmstate
+    return if $deviceid && $deviceid =~ m/tpmstate0/;
 
-    my $storage_name = PVE::Storage::parse_volume_id($volid);
-    my $scfg = $storecfg->{ids}->{$storage_name};
-    die "could not find storage '$storage_name'\n" if !defined($scfg);
+    #we use storage snapshot if vm is not running or if disk is unused;
+    return 'storage' if !$running || !$deviceid;
 
-    if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
-       return 1;
-    }
+    my $qemu_snapshot_type = 
PVE::Storage::volume_support_qemu_snapshot($storecfg, $volid);
+    # if running, but don't support qemu snapshot, we use storage snapshot
+    return 'storage' if !$qemu_snapshot_type;
 
-    if ($volid =~ m/\.(qcow2|qed)$/){
-       return 1;
-    }
-
-    return;
+    return $qemu_snapshot_type;
 }
 
 sub qga_check_running {
diff --git a/PVE/QemuServer/Blockdev.pm b/PVE/QemuServer/Blockdev.pm
index 692336c1..3a199c9e 100644
--- a/PVE/QemuServer/Blockdev.pm
+++ b/PVE/QemuServer/Blockdev.pm
@@ -3,6 +3,8 @@ package PVE::QemuServer::Blockdev;
 use strict;
 use warnings;
 
+use PVE::QemuServer;
+use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer::Drive qw(checked_volume_format drive_is_cdrom 
drive_uses_cache_direct get_drive_id get_iso_path 
storage_allows_io_uring_default);
 use PVE::Storage;
 
@@ -11,6 +13,11 @@ use base qw(Exporter);
 our @EXPORT_OK = qw(
 generate_drive_blockdev
 generate_throttle_group
+blockdev_external_snapshot
+blockdev_delete
+blockdev_rename
+blockdev_commit
+blockdev_stream
 );
 
 sub encode_base62 {
@@ -134,6 +141,8 @@ sub print_drive_throttle_group {
 sub generate_file_blockdev {
     my ($storecfg, $drive, $snap, $nodename) = @_;
 
+    $snap = undef if $snap && $snap eq 'current';
+
     my $volid = $drive->{file};
     my $drive_id = get_drive_id($drive);
     my $blockdev = {};
@@ -200,6 +209,8 @@ sub generate_file_blockdev {
 sub generate_format_blockdev {
     my ($storecfg, $drive, $file, $snap, $size, $nodename) = @_;
 
+    $snap = undef if $snap && $snap eq 'current';
+
     my $volid = $drive->{file};
     #nbd don't support format blockdev, return the fileblockdev
     return $file if $volid =~ /^nbd:/;
@@ -273,6 +284,15 @@ my sub generate_backing_chain_blockdev {
     return generate_backing_blockdev($storecfg, $snapshots, $deviceid, $drive, 
$parentid);
 }
 
+sub generate_blockdev_throttle {
+    my ($drive, $blockdev_file) = @_;
+
+    my $drive_id = get_drive_id($drive);
+    #this is the topfilter entry point, use $drive-drive_id as nodename
+    my $blockdev_throttle = { driver => "throttle", 'node-name' => 
"drive-$drive_id", 'throttle-group' => "throttle-drive-$drive_id", 'file' => 
$blockdev_file };
+    return $blockdev_throttle;
+}
+
 sub generate_drive_blockdev {
     my ($storecfg, $drive, $live_restore_name, $size) = @_;
 
@@ -297,25 +317,178 @@ sub generate_drive_blockdev {
        $blockdev_format->{backing} = $backing_chain if $backing_chain;
     }
 
-    my $blockdev_live_restore = undef;
     #pflash0 don't support throttle-filter
     return $blockdev_format if $drive_id eq 'pflash0';
 
-    if ($live_restore_name) {
-        die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect 
the format\n"
-            if !$drive->{format};
+    return generate_blockdev_throttle($drive, $blockdev_format) if 
!$live_restore_name;
 
-        $blockdev_live_restore = { 'node-name' => 
"liverestore-drive-$drive_id",
-                                   backing => $live_restore_name,
-                                   'auto-remove' => 'on', format => 
"alloc-track",
-                                   file => $blockdev_format };
-    }
+    die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the 
format\n"
+        if !$drive->{format};
 
-    #this is the topfilter entry point, use $drive-drive_id as nodename
-    my $blockdev_throttle = { driver => "throttle", 'node-name' => 
"drive-$drive_id", 'throttle-group' => "throttle-drive-$drive_id" };
     #put liverestore filter between throttle && format filter
-    $blockdev_throttle->{file} = $live_restore_name ? $blockdev_live_restore : 
$blockdev_format;
-    return $blockdev_throttle,
+    my $blockdev_live_restore = { 'node-name' => "liverestore-drive-$drive_id",
+                                  backing => $live_restore_name,
+                                  'auto-remove' => 'on', format => 
"alloc-track",
+                                  file => $blockdev_format };
+
+    return generate_blockdev_throttle($drive, $blockdev_live_restore);
+}
+
+sub blockdev_external_snapshot {
+    my ($storecfg, $vmid, $deviceid, $drive, $snap, $size) = @_;
+
+    my $volid = $drive->{file};
+
+    #preallocate add a new current file with reference to backing-file
+    PVE::Storage::volume_snapshot($storecfg, $volid, $snap, 1);
+
+    #be sure to add drive in write mode
+    delete($drive->{ro});
+
+    my $new_file_blockdev = generate_file_blockdev($storecfg, $drive);
+    my $new_fmt_blockdev = generate_format_blockdev($storecfg, $drive, 
$new_file_blockdev);
+
+    my $snap_file_blockdev = generate_file_blockdev($storecfg, $drive, $snap);
+    my $snap_fmt_blockdev = generate_format_blockdev($storecfg, $drive, 
$snap_file_blockdev, $snap);
+
+    #backing need to be forced to undef in blockdev, to avoid reopen of 
backing-file on blockdev-add
+    $new_fmt_blockdev->{backing} = undef;
+
+    mon_cmd($vmid, 'blockdev-add', %$new_fmt_blockdev);
+
+    mon_cmd($vmid, 'blockdev-snapshot', node => 
$snap_fmt_blockdev->{'node-name'}, overlay => $new_fmt_blockdev->{'node-name'});
+}
+
+sub blockdev_delete {
+    my ($storecfg, $vmid, $drive, $file_blockdev, $fmt_blockdev, $snap) = @_;
+
+    #add eval as reopen is auto removing the old nodename automatically only 
if it was created at vm start in command line argument
+    eval { mon_cmd($vmid, 'blockdev-del', 'node-name' => 
$file_blockdev->{'node-name'}) };
+    eval { mon_cmd($vmid, 'blockdev-del', 'node-name' => 
$fmt_blockdev->{'node-name'}) };
+
+    #delete the file (don't use vdisk_free as we don't want to delete all 
snapshot chain)
+    print"delete old $file_blockdev->{filename}\n";
+
+    my $storage_name = PVE::Storage::parse_volume_id($drive->{file});
+
+    my $volid = $drive->{file};
+    PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, 1);
 }
 
+sub blockdev_rename {
+    my ($storecfg, $vmid, $deviceid, $drive, $src_snap, $target_snap, 
$parent_snap) = @_;
+
+    print "rename $src_snap to $target_snap\n";
+
+    my $volid = $drive->{file};
+
+    my $src_file_blockdev = generate_file_blockdev($storecfg, $drive, 
$src_snap);
+    my $src_fmt_blockdev = generate_format_blockdev($storecfg, $drive, 
$src_file_blockdev, $src_snap);
+
+    #rename volume image
+    PVE::Storage::rename_volume($storecfg, $volid, $vmid, undef, $src_snap, 
$target_snap);
+
+    my $target_file_blockdev = generate_file_blockdev($storecfg, $drive, 
$target_snap);
+    my $target_fmt_blockdev = generate_format_blockdev($storecfg, $drive, 
$target_file_blockdev, $target_snap);
+
+    if($target_snap eq 'current' || $src_snap eq 'current') {
+       #rename from|to current
+
+       #add backing to target
+       if ($parent_snap) {
+           my $parent_fmt_nodename = encode_nodename('fmt', $volid, 
$parent_snap);
+           $target_fmt_blockdev->{backing} = $parent_fmt_nodename;
+       }
+       mon_cmd($vmid, 'blockdev-add', %$target_fmt_blockdev);
+
+       #reopen the current throttlefilter nodename with the target fmt nodename
+       my $throttle_blockdev = generate_blockdev_throttle($drive, 
$target_fmt_blockdev->{'node-name'});
+       mon_cmd($vmid, 'blockdev-reopen', options => [$throttle_blockdev]);
+    } else {
+        rename($src_file_blockdev->{filename}, 
$target_file_blockdev->{filename});
+
+       #intermediate snapshot
+       mon_cmd($vmid, 'blockdev-add', %$target_fmt_blockdev);
+
+       #reopen the parent node with the new target fmt backing node
+       my $parent_file_blockdev = generate_file_blockdev($storecfg, $drive, 
$parent_snap);
+       my $parent_fmt_blockdev = generate_format_blockdev($storecfg, $drive, 
$parent_file_blockdev, $parent_snap);
+       $parent_fmt_blockdev->{backing} = $target_fmt_blockdev->{'node-name'};
+       mon_cmd($vmid, 'blockdev-reopen', options => [$parent_fmt_blockdev]);
+
+       #change backing-file in qcow2 metadatas
+       mon_cmd($vmid, 'change-backing-file', device => $deviceid, 
'image-node-name' => $parent_fmt_blockdev->{'node-name'}, 'backing-file' => 
$target_file_blockdev->{filename});
+    }
+
+    # delete old file|fmt nodes
+    # add eval as reopen is auto removing the old nodename automatically only 
if it was created at vm start in command line argument
+    eval { mon_cmd($vmid, 'blockdev-del', 'node-name' => 
$src_file_blockdev->{'node-name'})};
+    eval { mon_cmd($vmid, 'blockdev-del', 'node-name' => 
$src_fmt_blockdev->{'node-name'})};
+}
+
+sub blockdev_commit {
+    my ($storecfg, $vmid, $deviceid, $drive, $src_snap, $target_snap) = @_;
+
+    my $volid = $drive->{file};
+
+    print "block-commit $src_snap to base:$target_snap\n";
+
+    my $target_file_blockdev = generate_file_blockdev($storecfg, $drive, 
$target_snap);
+    my $target_fmt_blockdev = generate_format_blockdev($storecfg, $drive, 
$target_file_blockdev, $target_snap);
+
+    my $src_file_blockdev = generate_file_blockdev($storecfg, $drive, 
$src_snap);
+    my $src_fmt_blockdev = generate_format_blockdev($storecfg, $drive, 
$src_file_blockdev, $src_snap);
+
+    my $job_id = "commit-$deviceid";
+    my $jobs = {};
+    my $opts = { 'job-id' => $job_id, device => $deviceid };
+
+    $opts->{'base-node'} = $target_fmt_blockdev->{'node-name'};
+    $opts->{'top-node'} = $src_fmt_blockdev->{'node-name'};
+
+    mon_cmd($vmid, "block-commit", %$opts);
+    $jobs->{$job_id} = {};
+
+    # if we commit the current, the blockjob need to be in 'complete' mode
+    my $complete = $src_snap && $src_snap ne 'current' ? 'auto' : 'complete';
+
+    eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, $jobs, 
$complete, 0, 'commit'); };
+    if ($@) {
+       die "Failed to complete block commit: $@\n";
+    }
+
+    blockdev_delete($storecfg, $vmid, $drive, $src_file_blockdev, 
$src_fmt_blockdev, $src_snap);
+}
+
+sub blockdev_stream {
+    my ($storecfg, $vmid, $deviceid, $drive, $snap, $parent_snap, 
$target_snap) = @_;
+
+    my $volid = $drive->{file};
+    $target_snap = undef if $target_snap eq 'current';
+
+    my $parent_file_blockdev = generate_file_blockdev($storecfg, $drive, 
$parent_snap);
+    my $parent_fmt_blockdev = generate_format_blockdev($storecfg, $drive, 
$parent_file_blockdev, $parent_snap);
+
+    my $target_file_blockdev = generate_file_blockdev($storecfg, $drive, 
$target_snap);
+    my $target_fmt_blockdev = generate_format_blockdev($storecfg, $drive, 
$target_file_blockdev, $target_snap);
+
+    my $snap_file_blockdev = generate_file_blockdev($storecfg, $drive, $snap);
+    my $snap_fmt_blockdev = generate_format_blockdev($storecfg, $drive, 
$snap_file_blockdev, $snap);
+
+    my $job_id = "stream-$deviceid";
+    my $jobs = {};
+    my $options = { 'job-id' => $job_id, device => 
$target_fmt_blockdev->{'node-name'} };
+    $options->{'base-node'} = $parent_fmt_blockdev->{'node-name'};
+    $options->{'backing-file'} = $parent_file_blockdev->{filename};
+
+    mon_cmd($vmid, 'block-stream', %$options);
+    $jobs->{$job_id} = {};
+
+    eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, $jobs, 
'auto', 0, 'stream'); };
+    if ($@) {
+       die "Failed to complete block stream: $@\n";
+    }
+
+    blockdev_delete($storecfg, $vmid, $drive, $snap_file_blockdev, 
$snap_fmt_blockdev, $snap);
+}
 1;
diff --git a/test/snapshot-test.pm b/test/snapshot-test.pm
index f130a5a5..1f61a13f 100644
--- a/test/snapshot-test.pm
+++ b/test/snapshot-test.pm
@@ -362,8 +362,8 @@ sub qmp_cmd {
 
 # BEGIN redefine PVE::QemuServer methods
 
-sub do_snapshots_with_qemu {
-    return 0;
+sub do_snapshots_type {
+    return 'storage';
 }
 
 sub vm_start {
-- 
2.39.5



--- End Message ---
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to