Looking very good! Just some nits and issue with schema description I noticed below:
Am 22.01.25 um 11:08 schrieb Markus Frank: > @@ -801,6 +802,32 @@ my sub check_vm_create_hostpci_perm { > return 1; > }; > > +my sub check_dir_perm { > + my ($rpcenv, $authuser, $vmid, $pool, $opt, $value) = @_; > + > + return 1 if $authuser eq 'root@pam'; > + > + $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']); > + > + my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', > $value); > + $rpcenv->check_full($authuser, "/mapping/dir/$virtiofs->{dirid}", > ['Mapping.Use']); > + > + return 1; > +}; > + > +my sub check_vm_create_dir_perm { > + my ($rpcenv, $authuser, $vmid, $pool, $param) = @_; > + > + return 1 if $authuser eq 'root@pam'; > + > + for my $opt (keys %{$param}) { Nit: sort just to have predictable failure. Or better, iterate over the known virtiofs keys instead like you do elsewhere. > + next if $opt !~ m/^virtiofs\d+$/; > + check_dir_perm($rpcenv, $authuser, $vmid, $pool, $opt, $param->{$opt}); > + } > + > + return 1; > +}; > + ---snip 8<--- > @@ -3703,8 +3709,11 @@ sub config_to_command { > push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, > $machine_version, $winversion, $gpu_passthrough); > } > > + my $virtiofs_enabled = > PVE::QemuServer::Virtiofs::virtiofs_enabled($conf); > + > PVE::QemuServer::Memory::config( > - $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd); > + $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd, > + $machineFlags, $virtiofs_enabled); Style nit: I'm afraid it's necessary to use one arguement per line to comply with our style guide now > > push @$cmd, '-S' if $conf->{freeze}; > > @@ -3994,6 +4003,8 @@ sub config_to_command { > push @$machineFlags, 'confidential-guest-support=sev0'; > } > > + PVE::QemuServer::Virtiofs::config($conf, $vmid, $devices); > + > push @$cmd, @$devices; > push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags); > push @$cmd, '-machine', join(',', @$machineFlags) if > scalar(@$machineFlags); > @@ -5792,6 +5803,8 @@ sub vm_start_nolock { > PVE::Tools::run_fork sub { > PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", > %systemd_properties); > > + my $virtiofs_sockets = start_all_virtiofsd($conf, $vmid); > + > my $tpmpid; > if ((my $tpm = $conf->{tpmstate0}) && > !PVE::QemuConfig->is_template($conf)) { > # start the TPM emulator so QEMU can connect on start > @@ -5804,8 +5817,10 @@ sub vm_start_nolock { > warn "stopping swtpm instance (pid $tpmpid) due to QEMU > startup error\n"; > kill 'TERM', $tpmpid; > } > + PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets); > die "QEMU exited with code $exitcode\n"; > } > + PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets); Instead of having it twice, could be done slightly above and safeguarded by an eval (should not be critical to fail here IMHO) like my $exitcode = run_command($cmd, %run_params); eval { PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets); }; log_warn("closing virtiofs sockets failed - $@") if $@; if ($exitcode) { ---snip 8<--- > @@ -418,15 +419,21 @@ sub config { > die "host NUMA node$i doesn't exist\n" > if !host_numanode_exists($i) && $conf->{hugepages}; > > - my $mem_object = print_mem_object($conf, "ram-node$i", > $numa_memory); > - push @$cmd, '-object', $mem_object; > - > my $cpus = ($cores * $i); > $cpus .= "-" . ($cpus + $cores - 1) if $cores > 1; > > - push @$cmd, '-numa', > "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i"; > + my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : > "ram-node$i"; > + my $mem_object = print_mem_object($conf, $memdev, $numa_memory); > + push @$cmd, '-object', $mem_object; > + push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev"; > } > } > + } elsif ($virtiofs_enabled) { > + # kvm: '-machine memory-backend' and '-numa memdev' properties are > + # mutually exclusive Style nit: above comment fits within 100 characters on one line > + push @$cmd, '-object', 'memory-backend-memfd,id=virtiofs-mem' > + .",size=$conf->{memory}M,share=on"; > + push @$machine_flags, 'memory-backend=virtiofs-mem'; > } > > if ($hotplug) { > @@ -453,6 +460,8 @@ sub print_mem_object { > my $path = hugepages_mount_path($hugepages_size); > > return > "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes"; > + } elsif ($id =~ m/^virtiofs-mem/) { > + return "memory-backend-memfd,id=$id,size=${size}M,share=on"; > } else { > return "memory-backend-ram,id=$id,size=${size}M"; > } > diff --git a/PVE/QemuServer/Virtiofs.pm b/PVE/QemuServer/Virtiofs.pm > new file mode 100644 > index 00000000..e1aabd5a > --- /dev/null > +++ b/PVE/QemuServer/Virtiofs.pm > @@ -0,0 +1,223 @@ > +package PVE::QemuServer::Virtiofs; > + > +use strict; > +use warnings; > + > +use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC); > +use IO::Socket::UNIX; > +use POSIX; > +use Socket qw(SOCK_STREAM); > + > +use PVE::JSONSchema qw(get_standard_option parse_property_string); Nit: get_standard_option is not used > +use PVE::Mapping::Dir; > +use PVE::RESTEnvironment qw(log_warn); > + missing use PVE::QemuServer::Helpers; for the PVE::QemuServer::Helpers::windows_version() call > +use base qw(Exporter); > + > +our @EXPORT_OK = qw( > +max_virtiofs > +start_all_virtiofsd > +); > + > +my $MAX_VIRTIOFS = 10; > +my $socket_path_root = "/run/qemu-server/virtiofsd"; > + > +my $virtiofs_fmt = { > + 'dirid' => { > + type => 'string', > + default_key => 1, > + description => "Mapping identifier of the directory mapping to be > shared with the guest." > + ." Also used as a mount tag inside the VM.", > + format_description => 'mapping-id', > + format => 'pve-configid', > + }, > + 'cache' => { > + type => 'string', > + description => "The caching policy the file system should use (auto, > always, never).", > + enum => [qw(auto always never)], > + default => "auto", > + optional => 1, > + }, > + 'direct-io' => { > + type => 'boolean', > + description => "Honor the O_DIRECT flag passed down by guest > applications.", > + default => 0, > + optional => 1, > + }, > + writeback => { > + type => 'boolean', > + description => "Enable writeback cache. If enabled, writes may be > cached in the guest until" > + ." the file is closed or an fsync is performed.", > + default => 0, > + optional => 1, > + }, > + 'expose-xattr' => { > + type => 'boolean', > + description => "Overwrite the xattr option from mapping and explicitly > enable/disable" > + ." support for extended attributes for the VM.", The setting is not VM-wide, so s/for the VM/for this mount/ The option doesn't exist for the mapping anymore so should not mention overwrite, right? > + default => 0, > + optional => 1, > + }, > + 'expose-acl' => { > + type => 'boolean', > + description => "Overwrite the acl option from mapping and explicitly > enable/disable support" > + ." for posix ACLs (enabled acl implies xattr) for the VM.", s/posix/POSIX/ s/for the VM/for this mount/ The option doesn't exist for the mapping anymore so should not mention overwrite, right? > + default => 0, > + optional => 1, > + }, > +}; > +PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt); > + > +my $virtiofsdesc = { > + optional => 1, > + type => 'string', format => $virtiofs_fmt, > + description => "Configuration for sharing a directory between host and > guest using Virtio-fs.", > +}; > +PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc); > + > +sub max_virtiofs { > + return $MAX_VIRTIOFS; > +} > + > +sub assert_virtiofs_config { > + my ($conf, $virtiofs) = @_; Nit: could also pass in just the ostype. That would also avoid potential to confuse $conf with the virtiofs config in this context. > + > + my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}}; > + my $node_list = > PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid}); > + > + my $acl = $virtiofs->{'expose-acl'}; > + if ($acl && PVE::QemuServer::Helpers::windows_version($conf->{ostype})) { > + log_warn( > + "Please disable ACLs for virtiofs on Windows VMs, otherwise" > + ." the virtiofs shared directory cannot be mounted." > + ); > + } > + > + if (!$node_list || scalar($node_list->@*) != 1) { > + die "virtiofs needs exactly one mapping for this node\n"; > + } > + > + eval { PVE::Mapping::Dir::assert_valid($node_list->[0]) }; > + die "directory mapping invalid: $@\n" if $@; > +} > + > +sub config { > + my ($conf, $vmid, $devices) = @_; > + > + for (my $i = 0; $i < max_virtiofs(); $i++) { > + my $opt = "virtiofs$i"; > + > + next if !$conf->{$opt}; > + my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt}); > + next if !$virtiofs; Nit: I'd "die" instead of "next" since this is unexpected, i.e. parse_property_string() was successful so there should be a result. And I don't think you even need it. We don't check this elsewhere either, because the only way parse_property_string() can return something false but not die if there is a custom validator for the format and that one returns something false but not die (which would be a bug with the validator). > + > + assert_virtiofs_config($conf, $virtiofs); > + > + push @$devices, '-chardev', > "socket,id=virtiofs$i,path=$socket_path_root/vm$vmid-fs$i"; > + > + # queue-size is set 1024 because of bug with Windows guests: > + # https://bugzilla.redhat.com/show_bug.cgi?id=1873088 > + # 1024 is also always used in the virtiofs documentations: > + # https://gitlab.com/virtio-fs/virtiofsd#examples > + push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024' > + .",chardev=virtiofs$i,tag=$virtiofs->{dirid}"; > + } > +} > + > +sub virtiofs_enabled { > + my ($conf) = @_; > + > + my $virtiofs_enabled = 0; > + for (my $i = 0; $i < max_virtiofs(); $i++) { > + my $opt = "virtiofs$i"; > + next if !$conf->{$opt}; > + my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt}); > + if ($virtiofs) { Nit: Similar to above, if we do get a result, it will always evaluate to true. > + $virtiofs_enabled = 1; > + last; > + } > + } > + return $virtiofs_enabled; > +} > + > +sub start_all_virtiofsd { > + my ($conf, $vmid) = @_; > + my $virtiofs_sockets = []; > + for (my $i = 0; $i < max_virtiofs(); $i++) { > + my $opt = "virtiofs$i"; > + > + next if !$conf->{$opt}; > + my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt}); > + next if !$virtiofs; Nit: Similar to above. Or did you ever run into the situation where the result from parse_property_string() would be false? > + > + my $virtiofs_socket = start_virtiofsd($vmid, $i, $virtiofs); > + push @$virtiofs_sockets, $virtiofs_socket; > + } > + return $virtiofs_sockets; > +} > + > +sub start_virtiofsd { > + my ($vmid, $fsid, $virtiofs) = @_; > + > + mkdir $socket_path_root; > + my $socket_path = "$socket_path_root/vm$vmid-fs$fsid"; > + unlink($socket_path); > + my $socket = IO::Socket::UNIX->new( > + Type => SOCK_STREAM, > + Local => $socket_path, > + Listen => 1, > + ) or die "cannot create socket - $!\n"; > + > + my $flags = fcntl($socket, F_GETFD, 0) > + or die "failed to get file descriptor flags: $!\n"; > + fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC) > + or die "failed to remove FD_CLOEXEC from file descriptor\n"; > + > + my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}}; > + my $node_list = > PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid}); > + my $node_cfg = $node_list->[0]; Nit: this should also check if there is exactly one entry in the result. But since all callers require that, we can make the find_on_current_node() function check this itself and return the single entry directly. > + > + my $virtiofsd_bin = '/usr/libexec/virtiofsd'; > + my $fd = $socket->fileno(); > + my $path = $node_cfg->{path}; > + > + my $could_not_fork_err = "could not fork to start virtiofsd\n"; > + my $pid = fork(); > + if ($pid == 0) { > + setsid(); Is this automatically imported by POSIX? I'd still be explicit here with POSIX::setsid(); > + $0 = "task pve-vm$vmid-virtiofs$fsid"; > + my $pid2 = fork(); > + if ($pid2 == 0) { > + my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"]; > + push @$cmd, '--xattr' if $virtiofs->{'expose-xattr'}; > + push @$cmd, '--posix-acl' if $virtiofs->{'expose-acl'}; > + push @$cmd, '--announce-submounts' if > $node_cfg->{'announce-submounts'}; > + push @$cmd, '--allow-direct-io' if $virtiofs->{'direct-io'}; > + push @$cmd, '--cache='.$virtiofs->{cache} if $virtiofs->{cache}; > + push @$cmd, '--writeback' if $virtiofs->{'writeback'}; > + push @$cmd, '--syslog'; > + exec(@$cmd); > + } elsif (!defined($pid2)) { > + die $could_not_fork_err; > + } else { > + POSIX::_exit(0); > + } > + } elsif (!defined($pid)) { > + die $could_not_fork_err; > + } else { > + waitpid($pid, 0); > + } > + > + # return socket to keep it alive, > + # so that QEMU will wait for virtiofsd to start > + return $socket; > +} > + > +sub close_sockets { > + my @sockets = @_; > + for my $socket (@sockets) { > + shutdown($socket, 2); > + close($socket); > + } > +} Style nit: missing blank line here > +1; _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel