On 6/15/20 2:21 PM, Thomas Lamprecht wrote:
Am 6/10/20 um 1:23 PM schrieb Fabian Ebner:
Implement it for generic storages supporting backups
(i.e. directory-based storages) and add a wrapper for PBS.

Signed-off-by: Fabian Ebner <f.eb...@proxmox.com>
---

Changes in v2:
     * Return actual volid in PBS using the new print_volid helper
     * Split out prune_mark_backup_group and move it to Storage.pm
       Like this it can be nicely reused from vzdump
     * Use // {} trick to avoid some defined-ness checks for $archive_info

in general looks OK, great! A few nits and notes inline.


  PVE/Storage.pm             | 103 +++++++++++++++-
  PVE/Storage/PBSPlugin.pm   |  50 ++++++++
  PVE/Storage/Plugin.pm      |  57 +++++++++
  test/prune_backups_test.pm | 237 +++++++++++++++++++++++++++++++++++++
  test/run_plugin_tests.pl   |   1 +
  5 files changed, 446 insertions(+), 2 deletions(-)
  create mode 100644 test/prune_backups_test.pm

diff --git a/PVE/Storage.pm b/PVE/Storage.pm
index a459572..cac67bb 100755
--- a/PVE/Storage.pm
+++ b/PVE/Storage.pm
@@ -40,11 +40,11 @@ use PVE::Storage::DRBDPlugin;
  use PVE::Storage::PBSPlugin;
# Storage API version. Icrement it on changes in storage API interface.
-use constant APIVER => 5;
+use constant APIVER => 6;
  # Age is the number of versions we're backward compatible with.
  # This is like having 'current=APIVER' and age='APIAGE' in libtool,
  # see 
https://www.gnu.org/software/libtool/manual/html_node/Libtool-versioning.html
-use constant APIAGE => 4;
+use constant APIAGE => 5;
# load standard plugins
  PVE::Storage::DirPlugin->register();
@@ -1536,6 +1536,105 @@ sub extract_vzdump_config {
      }
  }
+sub prune_backups {
+    my ($cfg, $storeid, $opts_override, $vmid, $dryrun) = @_;
+
+    my $scfg = storage_config($cfg, $storeid);
+    die "storage '$storeid' does not support backups\n" if 
!$scfg->{content}->{backup};
+
+    my $prune_opts = $opts_override;
+    if (!defined($prune_opts)) {
+       die "no prune-backups options configured for storage '$storeid'\n"
+           if !defined($scfg->{'prune-backups'});
+       $prune_opts = PVE::JSONSchema::parse_property_string('prune-backups', 
$scfg->{'prune-backups'});
+    }
+
+    my $all_zero = 1;
+    foreach my $opt (keys %{$prune_opts}) {
+       $all_zero = 0 if defined($prune_opts->{$opt}) && $prune_opts->{$opt} > 
0;
+    }

nit, you use the hash values directly:

for my $opt (values %$prune_opts) {
     $all_zero = 0 if defined($opt) && $opt > 0;
}


Ack.

+    die "at least one keep-option must be set and positive\n" if $all_zero;
+
+    my $plugin = PVE::Storage::Plugin->lookup($scfg->{type});
+    return $plugin->prune_backups($scfg, $storeid, $prune_opts, $vmid, 
$dryrun);
+}
+
+my $prune_mark = sub {
+    my ($prune_entries, $keep_count, $id_func) = @_;
+
+    return if !defined($keep_count);
+
+    my $already_included = {};
+
+    # determine time covered by previous selections
+    foreach my $prune_entry (@{$prune_entries}) {
+       my $mark = $prune_entry->{mark};
+       if (defined($mark) && $mark eq 'keep') {
+           my $id = $id_func->($prune_entry->{ctime});
+           $already_included->{$id} = 1;
+       }
+    }
+
+    my $newly_included = {};
+
+    foreach my $prune_entry (@{$prune_entries}) {
+       next if defined($prune_entry->{mark});
+
+       my $id = $id_func->($prune_entry->{ctime});
+       next if $already_included->{$id};
+
+       if (!$newly_included->{$id}) {
+           last if scalar(keys %{$newly_included}) >= $keep_count;
+           $newly_included->{$id} = 1;
+           $prune_entry->{mark} = 'keep';
+       } else {
+           $prune_entry->{mark} = 'remove';
+       }
+    }
+};
+
+sub prune_mark_backup_group {
+    my ($backup_group, $prune_opts) = @_;
+
+    my @prune_list = sort { $b->{ctime} <=> $a->{ctime} } @{$backup_group};
+
+    $prune_mark->(\@prune_list, $prune_opts->{'keep-last'}, sub {
+       my ($ctime) = @_;
+       return $ctime;
+    });
+    $prune_mark->(\@prune_list, $prune_opts->{'keep-hourly'}, sub {
+       my ($ctime) = @_;
+       my (undef, undef, $hour, $day, $month, $year) = localtime($ctime);
+       return "$hour/$day/$month/$year";
+    });
+    $prune_mark->(\@prune_list, $prune_opts->{'keep-daily'}, sub {
+       my ($ctime) = @_;
+       my (undef, undef, undef, $day, $month, $year) = localtime($ctime);
+       return "$day/$month/$year";
+    });
+    $prune_mark->(\@prune_list, $prune_opts->{'keep-weekly'}, sub {
+       my ($ctime) = @_;
+       my ($sec, $min, $hour, $day, $month, $year) = localtime($ctime);
+       my $iso_week = int(strftime("%V", $sec, $min, $hour, $day, $month - 1, 
$year - 1900));
+       my $iso_week_year = int(strftime("%G", $sec, $min, $hour, $day, $month 
- 1, $year - 1900));
+       return "$iso_week/$iso_week_year";
+    });
+    $prune_mark->(\@prune_list, $prune_opts->{'keep-monthly'}, sub {
+       my ($ctime) = @_;
+       my (undef, undef, undef, undef, $month, $year) = localtime($ctime);
+       return "$month/$year";
+    });
+    $prune_mark->(\@prune_list, $prune_opts->{'keep-yearly'}, sub {
+       my ($ctime) = @_;
+       my $year = (localtime($ctime))[5];
+       return "$year";
+    });
+
+    foreach my $prune_entry (@prune_list) {
+       $prune_entry->{mark} //= 'remove';
+    }
+}
+
  sub volume_export {
      my ($cfg, $fh, $volid, $format, $snapshot, $base_snapshot, 
$with_snapshots) = @_;
diff --git a/PVE/Storage/PBSPlugin.pm b/PVE/Storage/PBSPlugin.pm
index f029e55..576cc8b 100644
--- a/PVE/Storage/PBSPlugin.pm
+++ b/PVE/Storage/PBSPlugin.pm
@@ -181,6 +181,56 @@ sub extract_vzdump_config {
      return $config;
  }
+sub prune_backups {
+    my ($class, $scfg, $storeid, $prune_opts, $vmid, $dryrun) = @_;
+
+    my $backups = $class->list_volumes($storeid, $scfg, $vmid, ['backup']);
+
+    my $backup_groups = {};
+    foreach my $backup (@{$backups}) {
+       (my $guest_type = $backup->{format}) =~ s/^pbs-//;
+       my $backup_group = "$guest_type/$backup->{vmid}";
+       $backup_groups->{$backup_group} = 1;
+    }
+
+    my @param;
+    foreach my $prune_option (keys %{$prune_opts}) {
+       push @param, "--$prune_option";
+       push @param, "$prune_opts->{$prune_option}";
+    }
+
+    push @param, '--dry-run' if $dryrun;
+    push @param, '--output-format=json';
+
+    my @prune_list;
+
+    foreach my $backup_group (keys %{$backup_groups}) {
+       my $json_str;
+       my $outfunc = sub { $json_str .= "$_[0]\n" };
+
+       run_raw_client_cmd($scfg, $storeid, 'prune', [ $backup_group, @param ],
+                          outfunc => $outfunc, errmsg => 
'proxmox-backup-client failed');
+
+       my $res = decode_json($json_str);
+       foreach my $backup (@{$res}) {
+           my $type = $backup->{'backup-type'};
+           my $vmid = $backup->{'backup-id'};
+           my $ctime = $backup->{'backup-time'};
+           my $volid = print_volid($storeid, $type, $vmid, $ctime);
+           my $prune_entry = {
+               volid => $volid,
+               type => $type eq 'vm' ? 'qemu' : 'lxc',
+               ctime => $ctime,
+               vmid => $vmid,
+               mark => $backup->{keep} ? 'keep' : 'remove',
+           };
+           push @prune_list, $prune_entry;
+       }
+    }
+
+    return @prune_list;
+}
+
  sub on_add_hook {
      my ($class, $storeid, $scfg, %param) = @_;
diff --git a/PVE/Storage/Plugin.pm b/PVE/Storage/Plugin.pm
index 2351ec8..1008e3a 100644
--- a/PVE/Storage/Plugin.pm
+++ b/PVE/Storage/Plugin.pm
@@ -1166,6 +1166,63 @@ sub check_connection {
      return 1;
  }
+sub prune_backups {
+    my ($class, $scfg, $storeid, $prune_opts, $vmid, $dryrun) = @_;
+
+    my $backups = $class->list_volumes($storeid, $scfg, $vmid, ['backup']);
+
+    my $backup_groups = {};
+    my @prune_list;
+    foreach my $backup (@{$backups}) {
+       my $volid = $backup->{volid};
+       my $backup_vmid = $backup->{vmid};
+
+       my $prune_entry = {
+           volid => $volid,
+           vmid => $backup_vmid,
+           ctime => $backup->{ctime},
+       };
+
+       my $archive_info = eval { PVE::Storage::archive_info($volid) } // {};
+
+       my $type = $archive_info->{type} // 'unknown';
+       $prune_entry->{type} = $type;
+
+       if ($archive_info->{is_std_name}) {
+           $prune_entry->{ctime} = $archive_info->{ctime};
+           my $group = "$type/$backup_vmid";
+           push @{$backup_groups->{$group}}, $prune_entry;
+       } else {
+           # ignore backups that don't use the standard naming scheme
+           $prune_entry->{mark} = 'protected';
+       }
+
+       push @prune_list, $prune_entry;
+    }
+
+    foreach my $backup_group (values %{$backup_groups}) {
+       PVE::Storage::prune_mark_backup_group($backup_group, $prune_opts);
+    }
+
+    if (!$dryrun) {
+       foreach my $prune_entry (@prune_list) {
+           next if $prune_entry->{mark} ne 'remove';
+
+           my $volid = $prune_entry->{volid};
+           eval {
+               my (undef, $volname) = parse_volume_id($volid);
+               my $archive_path = $class->filesystem_path($scfg, $volname);
+               PVE::Storage::archive_remove($archive_path);
+           };
+           if (my $err = $@) {
+               warn "error when removing backup '$volid' - $err\n";
+           }
+       }
+    }
+
+    return @prune_list;
+}
+
  # Import/Export interface:
  #   Any path based storage is assumed to support 'raw' and 'tar' streams, so
  #   the default implementations will return this if $scfg->{path} is set,
diff --git a/test/prune_backups_test.pm b/test/prune_backups_test.pm
new file mode 100644
index 0000000..1ce1b30
--- /dev/null
+++ b/test/prune_backups_test.pm
@@ -0,0 +1,237 @@
+package PVE::Storage::TestPruneBackups;
+
+use strict;
+use warnings;
+
+use lib qw(..);
+
+use PVE::Storage;
+use Test::More;
+use Test::MockModule;
+
+my $storeid = 'BackTest123';
+my @vmids = (1234, 9001);
+
+# only includes the information needed for prune_backups
+my $mocked_backups_list = [];
+
+foreach my $vmid (@vmids) {
+    push @{$mocked_backups_list},
+       (
+           {
+               'volid' => 
"$storeid:backup/vzdump-qemu-$vmid-2018_05_26-11_18_21.tar.zst",
+               'ctime' => 1527333501,
+               'vmid'  => $vmid,
+           },
+           {
+               'volid' => 
"$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_18_21.tar.zst",
+               'ctime' => 1577791101,
+               'vmid'  => $vmid,
+           },
+           {
+               'volid' => 
"$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_19_21.tar.zst",
+               'ctime' => 1577787561,
+               'vmid'  => $vmid,
+           },
+           {
+               'volid' => 
"$storeid:backup/vzdump-qemu-$vmid-2020_01_01-11_18_21.tar.zst",
+               'ctime' => 1577877501,
+               'vmid'  => $vmid,
+           },
+           {
+               'volid' => 
"$storeid:backup/vzdump-qemu-$vmid-2020_01_01-12_18_21.tar.zst",
+               'ctime' => 1577881101,
+               'vmid'  => $vmid,
+           },
+           {
+               'volid' => 
"$storeid:backup/vzdump-lxc-$vmid-2020_01_01-12_18_21.tar.zst",
+               'ctime' => 1577881101,
+               'vmid'  => $vmid,
+           },
+           {
+               'volid' => "$storeid:backup/vzdump-$vmid-renamed.tar.zst",
+               'ctime' => 1234,
+               'vmid'  => $vmid,
+           },
+       );
+};
+my $mock_plugin = Test::MockModule->new('PVE::Storage::Plugin');
+$mock_plugin->redefine(list_volumes => sub {
+    my ($class, $storeid, $scfg, $vmid, $content_types) = @_;
+
+    return $mocked_backups_list if !defined($vmid);
+
+    my @list = grep { $_->{vmid} eq $vmid } @{$mocked_backups_list};
+    return \@list;
+});
+
+sub generate_expected {
+    my ($vmids, $marks) = @_;
+
+    my @expected;
+    foreach my $vmid (@{$vmids}) {
+       push @expected,
+           (
+               {
+                   'volid' => 
"$storeid:backup/vzdump-qemu-$vmid-2018_05_26-11_18_21.tar.zst",
+                   'type'  => 'qemu',
+                   'ctime' => 1527333501,

maybe we could have a $time variable with some start value (the highest used 
one) and declare it
in the tests like, for example:

ctime => $time - (12 * 60 * 60),

To make it slightly clearer what different backups times we have, just and idea 
though.


Yes, makes most sense for backups that are relatively close together, but even the difference between "2018_05_26-11_18_21" and "2020_01_01-12_18_21" should become more readable when expressed as such a calculation.

+                   'mark'  => $marks->[0],
+                   'vmid'  => $vmid,
+               },
+               {
+                   'volid' => 
"$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_18_21.tar.zst",
+                   'type'  => 'qemu',
+                   'ctime' => 1577791101,
+                   'mark'  => $marks->[1],
+                   'vmid'  => $vmid,
+               },
+               {
+                   'volid' => 
"$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_19_21.tar.zst",
+                   'type'  => 'qemu',
+                   'ctime' => 1577791161,
+                   'mark'  => $marks->[2],
+                   'vmid'  => $vmid,
+               },
+               {
+                   'volid' => 
"$storeid:backup/vzdump-qemu-$vmid-2020_01_01-11_18_21.tar.zst",
+                   'type'  => 'qemu',
+                   'ctime' => 1577877501,
+                   'mark'  => $marks->[3],
+                   'vmid'  => $vmid,
+               },
+               {
+                   'volid' => 
"$storeid:backup/vzdump-qemu-$vmid-2020_01_01-12_18_21.tar.zst",
+                   'type'  => 'qemu',
+                   'ctime' => 1577881101,
+                   'mark'  => $marks->[4],
+                   'vmid'  => $vmid,
+               },
+               {
+                   'volid' => 
"$storeid:backup/vzdump-lxc-$vmid-2020_01_01-12_18_21.tar.zst",
+                   'type'  => 'lxc',
+                   'ctime' => 1577881101,
+                   'mark'  => $marks->[5],
+                   'vmid'  => $vmid,
+               },
+               {
+                   'volid' => "$storeid:backup/vzdump-$vmid-renamed.tar.zst",
+                   'type'  => 'unknown',
+                   'ctime' => 1234,
+                   'mark'  => 'protected',
+                   'vmid'  => $vmid,
+               },
+           );
+    }
+    my @sorted = sort { $a->{volid} cmp $b->{volid} } @expected;
+    return \@sorted;
+}
+
+# an array of test cases, each test is comprised of the following keys:
+# description   => to identify a single test
+# vmid          => VMID or undef for all
+# prune_opts    => override options from storage configuration
+# expected      => what prune_backups should return
+#
+# most of them are created further below
+my $tests = [
+    {
+       description => 'last=3, multiple VMs',
+       prune_opts => {
+           'keep-last' => 3,
+       },
+       expected => generate_expected(\@vmids, ['remove', 'remove', 'keep', 
'keep', 'keep', 'keep']),

Hmm, maybe we want to annotate the protected for the return value, even if it's 
internally
the same as keep. Would allow annotating that in the webinterface/API for 
dry-run.


The 'protected' mark is returned by prune_backups and in the API. It's not passed along to the generate_expected helper, because the mark for the irregular backup is always 'protected', so I hard-coded it inside the helper.

Also, with a mix of CT and VM backups of the same vmid the type is ignored it 
seems?
Could be unexpected - I mean we don't really support that case, so can be OK 
too -
just wondering if you thought about this.


Just as with PBS a backup group is determined by type+ID, so if there are backups for an LXC and a VM with the same ID, prune will treat them as two groups not affecting each other.

There is currently no way to specify the full group when calling prune_backups (only the ID can be passed along), but as you said, it's not supported, and if it ever /is/ needed, that $vmid parameter could be extended.

+    },
+    {
+       description => 'weekly=2, one VM',
+       vmid => $vmids[0],
+       prune_opts => {
+           'keep-weekly' => 2,
+       },
+       expected => generate_expected([$vmids[0]], ['keep', 'remove', 'remove', 
'remove', 'keep', 'keep']),
+    },
+    {
+       description => 'daily=weekly=monthly=1, multiple VMs',
+       prune_opts => {
+           'keep-daily' => 1,
+           'keep-weekly' => 1,
+           'keep-monthly' => 1,
+       },
+       expected => generate_expected(\@vmids, ['keep', 'remove', 'keep', 
'remove', 'keep', 'keep']),
+    },
+    {
+       description => 'hourly=4, one VM',
+       vmid => $vmids[0],
+       prune_opts => {
+           'keep-hourly' => 4,
+       },
+       expected => generate_expected([$vmids[0]], ['keep', 'remove', 'keep', 
'keep', 'keep', 'keep']),
+    },
+    {
+       description => 'yearly=2, multiple VMs',
+       prune_opts => {
+           'keep-yearly' => 2,
+       },
+       expected => generate_expected(\@vmids, ['remove', 'remove', 'keep', 
'remove', 'keep', 'keep']),
+    },
+    {
+       description => 'last=2,hourly=2 one VM',
+       vmid => $vmids[0],
+       prune_opts => {
+           'keep-last' => 2,
+           'keep-hourly' => 2,
+       },
+       expected => generate_expected([$vmids[0]], ['keep', 'remove', 'keep', 
'keep', 'keep', 'keep']),
+    },
+    {
+       description => 'last=1,monthly=2, multiple VMs',
+       prune_opts => {
+           'keep-last' => 1,
+           'keep-monthly' => 2,
+       },
+       expected => generate_expected(\@vmids, ['keep', 'remove', 'keep', 
'remove', 'keep', 'keep']),
+    },
+    {
+       description => 'monthly=3, one VM',
+       vmid => $vmids[0],
+       prune_opts => {
+           'keep-monthly' => 3,
+       },
+       expected => generate_expected([$vmids[0]], ['keep', 'remove', 'keep', 
'remove', 'keep', 'keep']),
+    },
+    {
+       description => 'last=daily=weekly=1, multiple VMs',
+       prune_opts => {
+           'keep-last' => 1,
+           'keep-daily' => 1,
+           'keep-weekly' => 1,
+       },
+       expected => generate_expected(\@vmids, ['keep', 'remove', 'keep', 
'remove', 'keep', 'keep']),
+    },
+    {
+       description => 'daily=2, one VM',
+       vmid => $vmids[0],
+       prune_opts => {
+           'keep-daily' => 2,
+       },
+       expected => generate_expected([$vmids[0]], ['remove', 'remove', 'keep', 
'remove', 'keep', 'keep']),
+    },
+];
+
+plan tests => scalar @$tests;
+
+for my $tt (@$tests) {
+
+    my $got = eval {
+       my @list = PVE::Storage::Plugin->prune_backups($tt->{scfg}, $storeid, 
$tt->{prune_opts}, $tt->{vmid}, 1);
+       my @sorted = sort { $a->{volid} cmp $b->{volid} } @list;
+       return \@sorted;
+    };
+    $got = $@ if $@;
+
+    is_deeply($got, $tt->{expected}, $tt->{description}) || 
diag(explain($got));
+}
+
+done_testing();
+
+1;
diff --git a/test/run_plugin_tests.pl b/test/run_plugin_tests.pl
index 54322bb..d33429a 100755
--- a/test/run_plugin_tests.pl
+++ b/test/run_plugin_tests.pl
@@ -16,6 +16,7 @@ my $res = $harness->runtests(
      "path_to_volume_id_test.pm",
      "get_subdir_test.pm",
      "filesystem_path_test.pm",
+    "prune_backups_test.pm",
  );
exit -1 if !$res || $res->{failed} || $res->{parse_errors};



_______________________________________________
pve-devel mailing list
pve-devel@pve.proxmox.com
https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to