On June 12, 2025 4:02 pm, Fiona Ebner wrote: > The drive device and node structure is: > front-end device {ide-hd,scsi-hd,virtio-blk-pci} (id=$drive_id) > - throttle node (node-name=$drive_id)
should we prefix this is well to not "use" the basic name (e.g., in case in the future a new "topmost" node comes along)? > - format node (node-name=f$encoded-info) > - file node (node-name=e$encoded-info) > > The node-name can only be 31 characters long and needs to start with a > letter. The throttle node will stay inserted below the front-end > device. The other nodes might change however, because of drive > mirroring and similar. There currently are no good helpers to > query/walk the block graph via QMP, x-debug-query-block-graph is > experimental and for debugging only. Therefore, necessary information > is encoded in the node name to be able to find it again. In > particular, this is the volume ID, the drive ID and optionally a > snapshot name. As long as the configuration file matches with the > running instance, this is enough to find the correct node for > block operations like mirror and resize. > > The 'snapshot' option, for QEMU's snapshot mode, i.e. writes are only > temporary, is not yet supported. > > Originally-by: Alexandre Derumier <alexandre.derum...@groupe-cyllene.com> > [FE: split up patch > expand commit message > explicitly test for drivers with aio setting > improve readonly handling > improve CD-ROM handling > fix failure for storage named 'nbd' by always using full regex > improve node name generation > fail when drive->{snapshot} is set] > Signed-off-by: Fiona Ebner <f.eb...@proxmox.com> > --- > > If we do not get around to implement the 'snapshot' option in time for > PVE 9, we can still fall back to the legacy drive for that (and warn > users that not all operations might work with such drives). > > PVE/QemuServer/Blockdev.pm | 172 ++++++++++++++++++++++++++++++++++++- > PVE/QemuServer/Makefile | 1 + > 2 files changed, 172 insertions(+), 1 deletion(-) > > diff --git a/PVE/QemuServer/Blockdev.pm b/PVE/QemuServer/Blockdev.pm > index b1150141..2d3760f0 100644 > --- a/PVE/QemuServer/Blockdev.pm > +++ b/PVE/QemuServer/Blockdev.pm > @@ -3,7 +3,42 @@ package PVE::QemuServer::Blockdev; > use strict; > use warnings; > > -use PVE::QemuServer::Drive; > +use Digest::SHA; > +use Fcntl qw(S_ISBLK S_ISCHR); > +use File::stat; > +use JSON; > + > +use PVE::Storage; > + > +use PVE::QemuServer::Drive qw(drive_is_cdrom); > + > +my sub get_node_name { > + my ($type, $drive_id, $volid, $snap) = @_; > + > + my $info = "drive=$drive_id,"; > + $info .= "snap=$snap," if defined($snap); > + $info .= "volid=$volid"; > + > + my $hash = substr(Digest::SHA::sha256_hex($info), 0, 30); > + > + my $prefix = ""; > + if ($type eq 'fmt') { > + $prefix = 'f'; > + } elsif ($type eq 'file') { > + $prefix = 'e'; > + } else { > + die "unknown node type '$type'"; > + } > + # node-name must start with an alphabetical character > + return "${prefix}${hash}"; > +} > + > +my sub read_only_json_option { > + my ($drive, $options) = @_; > + > + return JSON::true if $drive->{ro} || drive_is_cdrom($drive) || > $options->{'read-only'}; > + return JSON::false; should we maybe have a generic jsonify-helper instead? this is only called twice, but we have (counting this as well) three call sites here that basically do foo ? JSON::true : JSON::false which could become `json_bool(foo)` with a few more in PVE::VZDump::QemuServer and other qemu-server modules.. we could still have a drive_is_ro helper in any case ;) > +} > > sub generate_throttle_group { > my ($drive) = @_; > @@ -41,4 +76,139 @@ sub generate_throttle_group { > }; > } > > +sub generate_blockdev_drive_cache { > + my ($drive, $scfg) = @_; > + > + my $cache_direct = > PVE::QemuServer::Drive::drive_uses_cache_direct($drive, $scfg); > + my $cache = {}; > + $cache->{direct} = $cache_direct ? JSON::true : JSON::false; > + $cache->{'no-flush'} = $drive->{cache} && $drive->{cache} eq 'unsafe' ? > JSON::true : JSON::false; > + return $cache; > +} > + > +sub generate_file_blockdev { > + my ($storecfg, $drive, $options) = @_; > + > + my $blockdev = {}; > + my $scfg = undef; > + > + die "generate_file_blockdev called without volid/path\n" if > !$drive->{file}; > + die "generate_file_blockdev called with 'none'\n" if $drive->{file} eq > 'none'; > + # FIXME use overlay and new config option to define storage for temp > write device > + die "'snapshot' option is not yet supported for '-blockdev'\n" if > $drive->{snapshot}; > + > + my $drive_id = PVE::QemuServer::Drive::get_drive_id($drive); > + > + if ($drive->{file} eq 'cdrom') { > + my $path = PVE::QemuServer::Drive::get_iso_path($storecfg, > $drive->{file}); > + $blockdev = { driver => 'host_cdrom', filename => $path }; > + } elsif ($drive->{file} =~ m|^/|) { > + my $path = $drive->{file}; > + # The 'file' driver only works for regular files. The check below is > taken from > + # block/file-posix.c:hdev_probe_device() in QEMU. To detect CD-ROM host > devices, QEMU issues > + # an ioctl, while the code here relies on the media=cdrom flag instead. > + my $st = File::stat::stat($path) or die "stat for '$path' failed - > $!\n"; > + my $driver = 'file'; > + if (S_ISCHR($st->mode) || S_ISBLK($st->mode)) { > + $driver = drive_is_cdrom($drive) ? 'host_cdrom' : 'host_device'; > + } > + $blockdev = { driver => $driver, filename => $path }; > + } else { > + my $volid = $drive->{file}; > + my ($storeid) = PVE::Storage::parse_volume_id($volid); > + > + my $vtype = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[0]; > + die "$drive_id: explicit media parameter is required for iso images\n" > + if !defined($drive->{media}) && defined($vtype) && $vtype eq 'iso'; > + > + my $storage_opts = { hints => {} }; > + $storage_opts->{hints}->{'efi-disk'} = 1 if $drive->{interface} eq > 'efidisk'; > + $storage_opts->{'snapshot-name'} = $options->{'snapshot-name'} > + if defined($options->{'snapshot-name'}); > + $blockdev = PVE::Storage::qemu_blockdev_options($storecfg, $volid, > $storage_opts); > + $scfg = PVE::Storage::storage_config($storecfg, $storeid); > + } > + > + $blockdev->{cache} = generate_blockdev_drive_cache($drive, $scfg); > + > + my $driver = $blockdev->{driver}; > + # only certain drivers have the aio setting > + if ($driver eq 'file' || $driver eq 'host_cdrom' || $driver eq > 'host_device') { > + $blockdev->{aio} = PVE::QemuServer::Drive::aio_cmdline_option( > + $scfg, $drive, $blockdev->{cache}->{direct}); > + } > + > + if (!drive_is_cdrom($drive)) { > + $blockdev->{discard} = $drive->{discard} && $drive->{discard} eq 'on' ? > 'unmap' : 'ignore'; > + $blockdev->{'detect-zeroes'} = > PVE::QemuServer::Drive::detect_zeroes_cmdline_option($drive); > + } > + > + $blockdev->{'node-name'} = get_node_name( > + 'file', $drive_id, $drive->{file}, $options->{'snapshot-name'}); > + > + $blockdev->{'read-only'} = read_only_json_option($drive, $options); > + > + return $blockdev; > +} > + > +sub generate_format_blockdev { > + my ($storecfg, $drive, $child, $options) = @_; > + > + die "generate_format_blockdev called without volid/path\n" if > !$drive->{file}; > + die "generate_format_blockdev called with 'none'\n" if $drive->{file} eq > 'none'; > + > + my $scfg; > + my $format; > + my $volid = $drive->{file}; > + my $drive_id = PVE::QemuServer::Drive::get_drive_id($drive); > + my ($storeid) = PVE::Storage::parse_volume_id($volid, 1); > + > + # For PVE-managed volumes, use the format from the storage layer and > prevent overrides via the > + # drive's 'format' option. For unmanaged volumes, fallback to 'raw' to > avoid auto-detection by > + # QEMU. > + if ($storeid) { > + $scfg = PVE::Storage::storage_config($storecfg, $storeid); > + $format = PVE::QemuServer::Drive::checked_volume_format($storecfg, > $volid); > + if ($drive->{format} && $drive->{format} ne $format) { > + die "drive '$drive->{interface}$drive->{index}' - volume '$volid'" > + ." - 'format=$drive->{format}' option different from storage > format '$format'\n"; > + } > + } else { > + $format = $drive->{format} // 'raw'; > + } > + > + # define cache option on both format && file node like libvirt does > + my $cache = generate_blockdev_drive_cache($drive, $scfg); > + > + my $node_name = get_node_name('fmt', $drive_id, $drive->{file}, > $options->{'snapshot-name'}); > + > + return { > + 'node-name' => "$node_name", > + driver => "$format", > + file => $child, > + cache => $cache, > + 'read-only' => read_only_json_option($drive, $options), > + }; > +} > + > +sub generate_drive_blockdev { > + my ($storecfg, $drive, $options) = @_; > + > + my $drive_id = PVE::QemuServer::Drive::get_drive_id($drive); > + > + die "generate_drive_blockdev called without volid/path\n" if > !$drive->{file}; > + die "generate_drive_blockdev called with 'none'\n" if $drive->{file} eq > 'none'; > + > + my $child = generate_file_blockdev($storecfg, $drive, $options); > + $child = generate_format_blockdev($storecfg, $drive, $child, $options); > + > + # this is the top filter entry point, use $drive-drive_id as nodename > + return { > + driver => "throttle", > + 'node-name' => "drive-$drive_id", > + 'throttle-group' => "throttle-drive-$drive_id", > + file => $child, > + }; > +} > + > 1; > diff --git a/PVE/QemuServer/Makefile b/PVE/QemuServer/Makefile > index 8054a93b..e3671e5b 100644 > --- a/PVE/QemuServer/Makefile > +++ b/PVE/QemuServer/Makefile > @@ -12,6 +12,7 @@ SOURCES=PCI.pm \ > MetaInfo.pm \ > CPUConfig.pm \ > CGroup.pm \ > + Blockdev.pm \ already added by the previous patch ;) should we sort this list? > Drive.pm \ > QMPHelpers.pm \ > StateFile.pm \ > -- > 2.39.5 > > > > _______________________________________________ > pve-devel mailing list > pve-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel > > > _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel