For '-drive', qemu-server sets special cache options for EFI disk using RBD. In preparation to seamlessly switch to the new '-blockdev' interface, do the same here. Note that the issue from bug #3329, which is solved by these cache options, still affects current versions.
With -blockdev, the cache options are split up. While cache.direct and cache.no-flush can be set in the -blockdev options, cache.writeback is a front-end property and was intentionally removed from the -blockdev options by QEMU commit aaa436f998 ("block: Remove cache.writeback from blockdev-add"). It needs to be configured as the 'write-cache' property for the ide-hd/scsi-hd/virtio-blk device. The default is already 'writeback' and no cache mode can be set for an EFI drive configuration in Proxmox VE currently, so there will not be a clash. ┌─────────────┬─────────────────┬──────────────┬────────────────┐ │ │ cache.writeback │ cache.direct │ cache.no-flush │ ├─────────────┼─────────────────┼──────────────┼────────────────┤ │writeback │ on │ off │ off │ ├─────────────┼─────────────────┼──────────────┼────────────────┤ │none │ on │ on │ off │ ├─────────────┼─────────────────┼──────────────┼────────────────┤ │writethrough │ off │ off │ off │ ├─────────────┼─────────────────┼──────────────┼────────────────┤ │directsync │ off │ on │ off │ ├─────────────┼─────────────────┼──────────────┼────────────────┤ │unsafe │ on │ off │ on │ └─────────────┴─────────────────┴──────────────┴────────────────┘ Table from 'man kvm'. Alternatively, the option could only be set once when allocating the RBD volume. However, then we would need to detect all cases were a volume could potentially be used as an EFI disk later. Having a custom disk type would help a lot there. The approach here was chosen as it is catch-all and should not be too costly either. Signed-off-by: Fiona Ebner <f.eb...@proxmox.com> --- src/PVE/Storage.pm | 4 ++-- src/PVE/Storage/ISCSIDirectPlugin.pm | 2 +- src/PVE/Storage/Plugin.pm | 23 +++++++++++++++++++++-- src/PVE/Storage/RBDPlugin.pm | 20 +++++++++++++++++++- src/PVE/Storage/ZFSPlugin.pm | 2 +- src/PVE/Storage/ZFSPoolPlugin.pm | 2 +- 6 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/PVE/Storage.pm b/src/PVE/Storage.pm index ec8b753..4920bd6 100755 --- a/src/PVE/Storage.pm +++ b/src/PVE/Storage.pm @@ -721,7 +721,7 @@ sub abs_filesystem_path { # see the documentation for the plugin method sub qemu_blockdev_options { - my ($cfg, $volid) = @_; + my ($cfg, $volid, $options) = @_; my ($storeid, $volname) = parse_volume_id($volid); @@ -733,7 +733,7 @@ sub qemu_blockdev_options { die "cannot use volume of type '$vtype' as a QEMU blockdevice\n" if $vtype ne 'images' && $vtype ne 'iso' && $vtype ne 'import'; - return $plugin->qemu_blockdev_options($scfg, $storeid, $volname); + return $plugin->qemu_blockdev_options($scfg, $storeid, $volname, $options); } # used as last resort to adapt volnames when migrating diff --git a/src/PVE/Storage/ISCSIDirectPlugin.pm b/src/PVE/Storage/ISCSIDirectPlugin.pm index 8c6b4ab..12b894d 100644 --- a/src/PVE/Storage/ISCSIDirectPlugin.pm +++ b/src/PVE/Storage/ISCSIDirectPlugin.pm @@ -111,7 +111,7 @@ sub path { } sub qemu_blockdev_options { - my ($class, $scfg, $storeid, $volname) = @_; + my ($class, $scfg, $storeid, $volname, $options) = @_; my $lun = ($class->parse_volname($volname))[1]; diff --git a/src/PVE/Storage/Plugin.pm b/src/PVE/Storage/Plugin.pm index 680bb6b..4ec4a0d 100644 --- a/src/PVE/Storage/Plugin.pm +++ b/src/PVE/Storage/Plugin.pm @@ -1965,7 +1965,7 @@ sub rename_volume { =head3 qemu_blockdev_options - $blockdev = $plugin->qemu_blockdev_options($scfg, $storeid, $volname) + $blockdev = $plugin->qemu_blockdev_options($scfg, $storeid, $volname, $options) Returns a hash reference with the basic options needed to open the volume via QEMU's C<-blockdev> API. This at least requires a C<< $blockdev->{driver} >> and a reference to the image, e.g. @@ -1992,12 +1992,31 @@ Arguments: =item C<$volume>: The volume name. +=item C<$options>: A hash reference with additional options. + +=over + +=item C<< $options->{hints} >>: A hash reference with hints indicating what the volume will be used +for. This can be safely ignored if no concrete issues are known with your plugin. For certain use +cases, setting additional (plugin-specific) options might be very beneficial however. An example is +setting the correct cache options for an EFI disk on RBD. The list of hints might get expanded in +the future. + +=over + +=item C<< $options->{hints}->{'efi-disk'} >>: (optional) If set, the volume will be used as the EFI +disk of a VM, containing its OMVF variables. + +=back + +=back + =back =cut sub qemu_blockdev_options { - my ($class, $scfg, $storeid, $volname) = @_; + my ($class, $scfg, $storeid, $volname, $options) = @_; my $blockdev = {}; diff --git a/src/PVE/Storage/RBDPlugin.pm b/src/PVE/Storage/RBDPlugin.pm index 6e70a19..c40c845 100644 --- a/src/PVE/Storage/RBDPlugin.pm +++ b/src/PVE/Storage/RBDPlugin.pm @@ -374,6 +374,16 @@ my sub rbd_volume_exists { return 0; } +# Needs to be public, so qemu-server can mock it for cfg2cmd. +sub rbd_volume_config_set { + my ($scfg, $storeid, $volname, $key, $value) = @_; + + my $cmd = $rbd_cmd->($scfg, $storeid, 'config', 'image', 'set', $volname, $key, $value); + run_rbd_command($cmd, errmsg => "rbd config image set $volname $key $value error"); + + return; +} + # Configuration sub type { @@ -514,7 +524,7 @@ sub path { } sub qemu_blockdev_options { - my ($class, $scfg, $storeid, $volname) = @_; + my ($class, $scfg, $storeid, $volname, $options) = @_; my $cmd_option = PVE::CephConfig::ceph_connect_option($scfg, $storeid); my ($name) = ($class->parse_volname($volname))[1]; @@ -547,6 +557,14 @@ sub qemu_blockdev_options { $blockdev->{user} = "$cmd_option->{userid}" if $cmd_option->{keyring}; + # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329 + if ($options->{hints}->{'efi-disk'}) { + # Querying the value would just cost more and the 'rbd image config get' command will just + # fail if the config has not been set yet, so it's not even straight-forward to do so. + # Simply set the value (possibly again). + rbd_volume_config_set($scfg, $storeid, $name, 'rbd_cache_policy', 'writeback'); + } + return $blockdev; } diff --git a/src/PVE/Storage/ZFSPlugin.pm b/src/PVE/Storage/ZFSPlugin.pm index c03fcca..0f64898 100644 --- a/src/PVE/Storage/ZFSPlugin.pm +++ b/src/PVE/Storage/ZFSPlugin.pm @@ -248,7 +248,7 @@ sub path { } sub qemu_blockdev_options { - my ($class, $scfg, $storeid, $volname) = @_; + my ($class, $scfg, $storeid, $volname, $options) = @_; my $name = ($class->parse_volname($volname))[1]; my $guid = $class->zfs_get_lu_name($scfg, $name); diff --git a/src/PVE/Storage/ZFSPoolPlugin.pm b/src/PVE/Storage/ZFSPoolPlugin.pm index 86c52ea..e775dae 100644 --- a/src/PVE/Storage/ZFSPoolPlugin.pm +++ b/src/PVE/Storage/ZFSPoolPlugin.pm @@ -163,7 +163,7 @@ sub path { } sub qemu_blockdev_options { - my ($class, $scfg, $storeid, $volname) = @_; + my ($class, $scfg, $storeid, $volname, $options) = @_; my $format = ($class->parse_volname($volname))[6]; -- 2.47.2 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel