On March 4, 2025 12:57 pm, Markus Frank wrote:
> add support for sharing directories with a guest vm.
> 
> virtio-fs needs virtiofsd to be started.
> In order to start virtiofsd as a process (despite being a daemon it is
> does not run in the background), a double-fork is used.
> 
> virtiofsd should close itself together with QEMU.
> 
> There are the parameters dirid and the optional parameters direct-io,
> cache and writeback. Additionally the expose-xattr & expose-acl
> parameter can be set to expose xattr & acl settings from the shared
> filesystem to the guest system.
> 
> The dirid gets mapped to the path on the current node and is also used
> as a mount tag (name used to mount the device on the guest).
> 
> example config:
> ```
> virtiofs0: foo,direct-io=1,cache=always,expose-acl=1
> virtiofs1: dirid=bar,cache=never,expose-xattr=1,writeback=1
> ```
> 
> For information on the optional parameters see the coherent doc patch
> and the official gitlab README:
> https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md
> 
> Also add a permission check for virtiofs directory access.
> 
> Signed-off-by: Markus Frank <m.fr...@proxmox.com>
> ---
> v14:
> * use max_virtiofs() in check_vm_create_dir_perm
> * addressed style nits and improved formatting
> * added missing imports/uses
> * assert_virtiofs_config now only gets the ostype and the virtiofs cfg
> * removed unnecessary checks after parse_property_string
> 
>  PVE/API2/Qemu.pm           |  41 ++++++-
>  PVE/QemuServer.pm          |  29 ++++-
>  PVE/QemuServer/Makefile    |   3 +-
>  PVE/QemuServer/Memory.pm   |  22 ++--
>  PVE/QemuServer/Virtiofs.pm | 211 +++++++++++++++++++++++++++++++++++++
>  5 files changed, 295 insertions(+), 11 deletions(-)
>  create mode 100644 PVE/QemuServer/Virtiofs.pm
> 
> diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
> index 5ac61aa5..7cefffdf 100644
> --- a/PVE/API2/Qemu.pm
> +++ b/PVE/API2/Qemu.pm
> @@ -39,6 +39,7 @@ use PVE::QemuServer::MetaInfo;
>  use PVE::QemuServer::PCI;
>  use PVE::QemuServer::QMPHelpers;
>  use PVE::QemuServer::USB;
> +use PVE::QemuServer::Virtiofs qw(max_virtiofs);
>  use PVE::QemuMigrate;
>  use PVE::RPCEnvironment;
>  use PVE::AccessControl;
> @@ -801,6 +802,33 @@ my sub check_vm_create_hostpci_perm {
>      return 1;
>  };
>  
> +my sub check_dir_perm {
> +    my ($rpcenv, $authuser, $vmid, $pool, $opt, $value) = @_;
> +
> +    return 1 if $authuser eq 'root@pam';
> +
> +    $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
> +
> +    my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', 
> $value);
> +    $rpcenv->check_full($authuser, "/mapping/dir/$virtiofs->{dirid}", 
> ['Mapping.Use']);
> +
> +    return 1;
> +};
> +
> +my sub check_vm_create_dir_perm {
> +    my ($rpcenv, $authuser, $vmid, $pool, $param) = @_;
> +
> +    return 1 if $authuser eq 'root@pam';
> +
> +    for (my $i = 0; $i < max_virtiofs(); $i++) {
> +     my $opt = "virtiofs$i";
> +     next if !$param->{$opt};
> +     check_dir_perm($rpcenv, $authuser, $vmid, $pool, $opt, $param->{$opt});
> +    }
> +
> +    return 1;
> +};
> +
>  my $check_vm_modify_config_perm = sub {
>      my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
>  
> @@ -811,7 +839,7 @@ my $check_vm_modify_config_perm = sub {
>       # else, as there the permission can be value dependent
>       next if PVE::QemuServer::is_valid_drivename($opt);
>       next if $opt eq 'cdrom';
> -     next if $opt =~ m/^(?:unused|serial|usb|hostpci)\d+$/;
> +     next if $opt =~ m/^(?:unused|serial|usb|hostpci|virtiofs)\d+$/;
>       next if $opt eq 'tags';
>  
>  
> @@ -1114,6 +1142,7 @@ __PACKAGE__->register_method({
>           &$check_vm_create_serial_perm($rpcenv, $authuser, $vmid, $pool, 
> $param);
>           check_vm_create_usb_perm($rpcenv, $authuser, $vmid, $pool, $param);
>           check_vm_create_hostpci_perm($rpcenv, $authuser, $vmid, $pool, 
> $param);
> +         check_vm_create_dir_perm($rpcenv, $authuser, $vmid, $pool, $param);
>  
>           PVE::QemuServer::check_bridge_access($rpcenv, $authuser, $param);
>           &$check_cpu_model_access($rpcenv, $authuser, $param);
> @@ -2005,6 +2034,10 @@ my $update_vm_api  = sub {
>                   check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, 
> $val);
>                   PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
>                   PVE::QemuConfig->write_config($vmid, $conf);
> +             } elsif ($opt =~ m/^virtiofs\d$/) {
> +                 check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, 
> $val);
> +                 PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
> +                 PVE::QemuConfig->write_config($vmid, $conf);
>               } elsif ($opt eq 'tags') {
>                   assert_tag_permissions($vmid, $val, '', $rpcenv, $authuser);
>                   delete $conf->{$opt};
> @@ -2095,6 +2128,12 @@ my $update_vm_api  = sub {
>                   }
>                   check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, 
> $param->{$opt});
>                   $conf->{pending}->{$opt} = $param->{$opt};
> +             } elsif ($opt =~ m/^virtiofs\d$/) {
> +                 if (my $oldvalue = $conf->{$opt}) {
> +                     check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, 
> $oldvalue);
> +                 }
> +                 check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, 
> $param->{$opt});
> +                 $conf->{pending}->{$opt} = $param->{$opt};
>               } elsif ($opt eq 'tags') {
>                   assert_tag_permissions($vmid, $conf->{$opt}, 
> $param->{$opt}, $rpcenv, $authuser);
>                   $conf->{pending}->{$opt} = 
> PVE::GuestHelpers::get_unique_tags($param->{$opt});
> diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
> index b6fc1f17..748b0acf 100644
> --- a/PVE/QemuServer.pm
> +++ b/PVE/QemuServer.pm
> @@ -33,6 +33,7 @@ use PVE::Exception qw(raise raise_param_exc);
>  use PVE::Format qw(render_duration render_bytes);
>  use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
>  use PVE::HA::Config;
> +use PVE::Mapping::Dir;
>  use PVE::Mapping::PCI;
>  use PVE::Mapping::USB;
>  use PVE::INotify;
> @@ -62,6 +63,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
>  use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr 
> print_pcie_root_port parse_hostpci);
>  use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel 
> qemu_objectadd qemu_objectdel);
>  use PVE::QemuServer::USB;
> +use PVE::QemuServer::Virtiofs qw(max_virtiofs start_all_virtiofsd);
>  
>  my $have_sdn;
>  eval {
> @@ -948,6 +950,10 @@ my $netdesc = {
>  
>  PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
>  
> +for (my $i = 0; $i < max_virtiofs(); $i++)  {
> +    $confdesc->{"virtiofs$i"} = get_standard_option('pve-qm-virtiofs');
> +}
> +
>  my $ipconfig_fmt = {
>      ip => {
>       type => 'string',
> @@ -3707,8 +3713,18 @@ sub config_to_command {
>       push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, 
> $machine_version, $winversion, $gpu_passthrough);
>      }
>  
> +    my $virtiofs_enabled = 
> PVE::QemuServer::Virtiofs::virtiofs_enabled($conf);
> +
>      PVE::QemuServer::Memory::config(
> -     $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd);
> +     $conf,
> +     $vmid,
> +     $sockets,
> +     $cores,
> +     $hotplug_features->{memory},
> +     $cmd,
> +     $machineFlags,

machineFlags is only passed to

> +     $virtiofs_enabled
> +    );
>  
>      push @$cmd, '-S' if $conf->{freeze};
>  
> @@ -3998,6 +4014,8 @@ sub config_to_command {
>       push @$machineFlags, 'confidential-guest-support=sev0';
>      }
>  
> +    PVE::QemuServer::Virtiofs::config($conf, $vmid, $devices);
> +
>      push @$cmd, @$devices;
>      push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
>      push @$cmd, '-machine', join(',', @$machineFlags) if 
> scalar(@$machineFlags);
> @@ -5806,6 +5824,8 @@ sub vm_start_nolock {
>       PVE::Tools::run_fork sub {
>           PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", 
> %systemd_properties);
>  
> +         my $virtiofs_sockets = start_all_virtiofsd($conf, $vmid);
> +
>           my $tpmpid;
>           if ((my $tpm = $conf->{tpmstate0}) && 
> !PVE::QemuConfig->is_template($conf)) {
>               # start the TPM emulator so QEMU can connect on start
> @@ -5813,6 +5833,8 @@ sub vm_start_nolock {
>           }
>  
>           my $exitcode = run_command($cmd, %run_params);
> +         eval { 
> PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets); };
> +         log_warn("closing virtiofs sockets failed - $@") if $@;
>           if ($exitcode) {
>               if ($tpmpid) {
>                   warn "stopping swtpm instance (pid $tpmpid) due to QEMU 
> startup error\n";
> @@ -6482,7 +6504,10 @@ sub check_mapping_access {
>           } else {
>               die "either 'host' or 'mapping' must be set.\n";
>           }
> -       }
> +     } elsif ($opt =~ m/^virtiofs\d$/) {
> +         my $virtiofs = 
> PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> +         $rpcenv->check_full($user, "/mapping/dir/$virtiofs->{dirid}", 
> ['Mapping.Use']);
> +     }
>     }
>  };
>  
> diff --git a/PVE/QemuServer/Makefile b/PVE/QemuServer/Makefile
> index 18fd13ea..3588e0e1 100644
> --- a/PVE/QemuServer/Makefile
> +++ b/PVE/QemuServer/Makefile
> @@ -11,7 +11,8 @@ SOURCES=PCI.pm              \
>       CPUConfig.pm    \
>       CGroup.pm       \
>       Drive.pm        \
> -     QMPHelpers.pm
> +     QMPHelpers.pm   \
> +     Virtiofs.pm
>  
>  .PHONY: install
>  install: ${SOURCES}
> diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
> index e5024cd2..dcdb4f76 100644
> --- a/PVE/QemuServer/Memory.pm
> +++ b/PVE/QemuServer/Memory.pm
> @@ -336,7 +336,7 @@ sub qemu_memdevices_list {
>  }
>  
>  sub config {
> -    my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd) = @_;
> +    my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd, $machine_flags, 
> $virtiofs_enabled) = @_;

there's only a single call site, should we maybe order the parameters
that are changed ($cmd and $machine_flags) last?

>  
>      my $memory = get_current_memory($conf->{memory});
>      my $static_memory = 0;
> @@ -379,7 +379,8 @@ sub config {
>           my $numa_memory = $numa->{memory};
>           $numa_totalmemory += $numa_memory;
>  
> -         my $mem_object = print_mem_object($conf, "ram-node$i", 
> $numa_memory);
> +         my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
> +         my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
>  
>           # cpus
>           my $cpulists = $numa->{cpus};
> @@ -404,7 +405,7 @@ sub config {
>           }
>  
>           push @$cmd, '-object', $mem_object;
> -         push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
> +         push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
>       }
>  
>       die "total memory for NUMA nodes must be equal to vm static memory\n"
> @@ -418,15 +419,20 @@ sub config {
>               die "host NUMA node$i doesn't exist\n"
>                   if !host_numanode_exists($i) && $conf->{hugepages};
>  
> -             my $mem_object = print_mem_object($conf, "ram-node$i", 
> $numa_memory);
> -             push @$cmd, '-object', $mem_object;
> -
>               my $cpus = ($cores * $i);
>               $cpus .= "-" . ($cpus + $cores - 1) if $cores > 1;
>  
> -             push @$cmd, '-numa', 
> "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
> +             my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : 
> "ram-node$i";
> +             my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
> +             push @$cmd, '-object', $mem_object;
> +             push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
>           }
>       }
> +    } elsif ($virtiofs_enabled) {
> +     # kvm: '-machine memory-backend' and '-numa memdev' properties are 
> mutually exclusive
> +     push @$cmd, '-object', 'memory-backend-memfd,id=virtiofs-mem'
> +         .",size=$conf->{memory}M,share=on";
> +     push @$machine_flags, 'memory-backend=virtiofs-mem';
>      }
>  
>      if ($hotplug) {
> @@ -453,6 +459,8 @@ sub print_mem_object {
>       my $path = hugepages_mount_path($hugepages_size);
>  
>       return 
> "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
> +    } elsif ($id =~ m/^virtiofs-mem/) {
> +     return "memory-backend-memfd,id=$id,size=${size}M,share=on";
>      } else {
>       return "memory-backend-ram,id=$id,size=${size}M";
>      }
> diff --git a/PVE/QemuServer/Virtiofs.pm b/PVE/QemuServer/Virtiofs.pm
> new file mode 100644
> index 00000000..94ef7b47
> --- /dev/null
> +++ b/PVE/QemuServer/Virtiofs.pm
> @@ -0,0 +1,211 @@
> +package PVE::QemuServer::Virtiofs;
> +
> +use strict;
> +use warnings;
> +
> +use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC);
> +use IO::Socket::UNIX;
> +use POSIX;
> +use Socket qw(SOCK_STREAM);
> +
> +use PVE::JSONSchema qw(parse_property_string);
> +use PVE::Mapping::Dir;
> +use PVE::QemuServer::Helpers;
> +use PVE::RESTEnvironment qw(log_warn);
> +
> +use base qw(Exporter);
> +
> +our @EXPORT_OK = qw(
> +max_virtiofs
> +start_all_virtiofsd
> +);
> +
> +my $MAX_VIRTIOFS = 10;
> +my $socket_path_root = "/run/qemu-server/virtiofsd";
> +
> +my $virtiofs_fmt = {
> +    'dirid' => {
> +     type => 'string',
> +     default_key => 1,
> +     description => "Mapping identifier of the directory mapping to be 
> shared with the guest."
> +         ." Also used as a mount tag inside the VM.",
> +     format_description => 'mapping-id',
> +     format => 'pve-configid',
> +    },
> +    'cache' => {
> +     type => 'string',
> +     description => "The caching policy the file system should use (auto, 
> always, never).",
> +     enum => [qw(auto always never)],
> +     default => "auto",
> +     optional => 1,
> +    },
> +    'direct-io' => {
> +     type => 'boolean',
> +     description => "Honor the O_DIRECT flag passed down by guest 
> applications.",
> +     default => 0,
> +     optional => 1,
> +    },
> +    writeback => {
> +     type => 'boolean',
> +     description => "Enable writeback cache. If enabled, writes may be 
> cached in the guest until"
> +         ." the file is closed or an fsync is performed.",
> +     default => 0,
> +     optional => 1,
> +    },
> +    'expose-xattr' => {
> +     type => 'boolean',
> +     description => "Enable support for extended attributes for this mount.",
> +     default => 0,
> +     optional => 1,
> +    },
> +    'expose-acl' => {
> +     type => 'boolean',
> +     description => "Enable support for POSIX ACLs (enabled ACL implies 
> xattr) for this mount.",
> +     default => 0,
> +     optional => 1,
> +    },
> +};
> +PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt);
> +
> +my $virtiofsdesc = {
> +    optional => 1,
> +    type => 'string', format => $virtiofs_fmt,
> +    description => "Configuration for sharing a directory between host and 
> guest using Virtio-fs.",
> +};
> +PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc);
> +
> +sub max_virtiofs {
> +    return $MAX_VIRTIOFS;
> +}
> +
> +sub assert_virtiofs_config {
> +    my ($ostype, $virtiofs) = @_;
> +
> +    my $dir_cfg = 
> PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
> +
> +    my $acl = $virtiofs->{'expose-acl'};
> +    if ($acl && PVE::QemuServer::Helpers::windows_version($ostype)) {
> +     log_warn(
> +         "Please disable ACLs for virtiofs on Windows VMs, otherwise"
> +         ." the virtiofs shared directory cannot be mounted."
> +     );

this is called assert, but here we do a check and just warn if it fails?

> +    }
> +
> +    eval { PVE::Mapping::Dir::assert_valid($dir_cfg) };
> +    die "directory mapping invalid: $@\n" if $@;
> +}
> +
> +sub config {
> +    my ($conf, $vmid, $devices) = @_;
> +
> +    for (my $i = 0; $i < max_virtiofs(); $i++) {
> +     my $opt = "virtiofs$i";
> +
> +     next if !$conf->{$opt};
> +     my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> +
> +     assert_virtiofs_config($conf->{ostype}, $virtiofs);
> +
> +     push @$devices, '-chardev', 
> "socket,id=virtiofs$i,path=$socket_path_root/vm$vmid-fs$i";
> +
> +     # queue-size is set 1024 because of bug with Windows guests:
> +     # https://bugzilla.redhat.com/show_bug.cgi?id=1873088
> +     # 1024 is also always used in the virtiofs documentations:
> +     # https://gitlab.com/virtio-fs/virtiofsd#examples
> +     push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024'
> +         .",chardev=virtiofs$i,tag=$virtiofs->{dirid}";
> +    }
> +}
> +
> +sub virtiofs_enabled {
> +    my ($conf) = @_;
> +
> +    my $virtiofs_enabled = 0;
> +    for (my $i = 0; $i < max_virtiofs(); $i++) {
> +     my $opt = "virtiofs$i";
> +     next if !$conf->{$opt};
> +     parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> +     $virtiofs_enabled = 1;
> +    }
> +    return $virtiofs_enabled;
> +}
> +
> +sub start_all_virtiofsd {
> +    my ($conf, $vmid) = @_;
> +    my $virtiofs_sockets = [];
> +    for (my $i = 0; $i < max_virtiofs(); $i++) {
> +     my $opt = "virtiofs$i";
> +
> +     next if !$conf->{$opt};
> +     my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> +
> +     my $virtiofs_socket = start_virtiofsd($vmid, $i, $virtiofs);
> +     push @$virtiofs_sockets, $virtiofs_socket;
> +    }
> +    return $virtiofs_sockets;
> +}
> +
> +sub start_virtiofsd {
> +    my ($vmid, $fsid, $virtiofs) = @_;
> +
> +    mkdir $socket_path_root;
> +    my $socket_path = "$socket_path_root/vm$vmid-fs$fsid";
> +    unlink($socket_path);
> +    my $socket = IO::Socket::UNIX->new(
> +     Type => SOCK_STREAM,
> +     Local => $socket_path,
> +     Listen => 1,
> +    ) or die "cannot create socket - $!\n";
> +
> +    my $flags = fcntl($socket, F_GETFD, 0)
> +     or die "failed to get file descriptor flags: $!\n";
> +    fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC)
> +     or die "failed to remove FD_CLOEXEC from file descriptor\n";
> +
> +    my $dir_cfg = 
> PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
> +
> +    my $virtiofsd_bin = '/usr/libexec/virtiofsd';
> +    my $fd = $socket->fileno();
> +    my $path = $dir_cfg->{path};
> +
> +    my $could_not_fork_err = "could not fork to start virtiofsd\n";
> +    my $pid = fork();
> +    if ($pid == 0) {
> +     POSIX::setsid();
> +     $0 = "task pve-vm$vmid-virtiofs$fsid";
> +     my $pid2 = fork();
> +     if ($pid2 == 0) {
> +         my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"];
> +         push @$cmd, '--xattr' if $virtiofs->{'expose-xattr'};
> +         push @$cmd, '--posix-acl' if $virtiofs->{'expose-acl'};
> +         push @$cmd, '--announce-submounts' if 
> $dir_cfg->{'announce-submounts'};
> +         push @$cmd, '--allow-direct-io' if $virtiofs->{'direct-io'};
> +         push @$cmd, '--cache='.$virtiofs->{cache} if $virtiofs->{cache};
> +         push @$cmd, '--writeback' if $virtiofs->{'writeback'};
> +         push @$cmd, '--syslog';
> +         exec(@$cmd);
> +     } elsif (!defined($pid2)) {
> +         die $could_not_fork_err;
> +     } else {
> +         POSIX::_exit(0);
> +     }
> +    } elsif (!defined($pid)) {
> +     die $could_not_fork_err;
> +    } else {
> +     waitpid($pid, 0);
> +    }
> +
> +    # return socket to keep it alive,
> +    # so that QEMU will wait for virtiofsd to start
> +    return $socket;
> +}
> +
> +sub close_sockets {
> +    my @sockets = @_;
> +    for my $socket (@sockets) {
> +     shutdown($socket, 2);
> +     close($socket);
> +    }
> +}
> +
> +1;
> -- 
> 2.39.5
> 
> 
> 
> _______________________________________________
> pve-devel mailing list
> pve-devel@lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
> 
> 
> 


_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to