On November 7, 2024 5:51 pm, Fiona Ebner wrote: > The first use case is running the container backup subroutine for > external providers inside a user namespace. That allows them to see > the filesystem to back-up from the containers perspective and also > improves security because of isolation. > > Copied and adapted the relevant parts from the pve-buildpkg > repository. > > Originally-by: Wolfgang Bumiller <w.bumil...@proxmox.com> > [FE: add $idmap parameter, drop $aux_groups parameter] > Signed-off-by: Fiona Ebner <f.eb...@proxmox.com> > --- > > New in v3. > > src/Makefile | 1 + > src/PVE/Env.pm | 136 +++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 137 insertions(+) > create mode 100644 src/PVE/Env.pm > > diff --git a/src/Makefile b/src/Makefile > index 2d8bdc4..dba26e3 100644 > --- a/src/Makefile > +++ b/src/Makefile > @@ -15,6 +15,7 @@ LIB_SOURCES = \ > Certificate.pm \ > CpuSet.pm \ > Daemon.pm \ > + Env.pm \ > Exception.pm \ > Format.pm \ > INotify.pm \ > diff --git a/src/PVE/Env.pm b/src/PVE/Env.pm > new file mode 100644 > index 0000000..e11bec0 > --- /dev/null > +++ b/src/PVE/Env.pm > @@ -0,0 +1,136 @@ > +package PVE::Env;
I agree with Thomas that this name might be a bit too generic ;) I also wonder - since this seems to be only used in pve-container, and it really mostly makes sense in that context, wouldn't it be better off there? or do we expect other areas where we need userns handling? (granted, some of the comments below would require other changes to pve-common anyway ;)) > + > +use strict; > +use warnings; > + > +use Fcntl qw(O_WRONLY); > +use POSIX qw(EINTR); > +use Socket; > + > +require qw(syscall.ph); PVE::Syscall already does this, and has the following: BEGIN { die "syscall.ph can only be required once!\n" if $INC{'syscall.ph'}; require("syscall.ph"); don't those two clash? I think those syscall related parts should probably move there? > + > +use constant {CLONE_NEWNS => 0x00020000, > + CLONE_NEWUSER => 0x10000000}; > + > +sub unshare($) { > + my ($flags) = @_; > + return 0 == syscall(272, $flags); > +} this is PVE::Tools::unshare, maybe the latter should move here? > + > +sub __set_id_map($$$) { > + my ($pid, $what, $value) = @_; > + sysopen(my $fd, "/proc/$pid/${what}_map", O_WRONLY) > + or die "failed to open child process' ${what}_map\n"; > + my $rc = syswrite($fd, $value); > + if (!$rc || $rc != length($value)) { > + die "failed to set sub$what: $!\n"; > + } > + close($fd); > +} > + > +sub set_id_map($$) { > + my ($pid, $id_map) = @_; > + > + my $gid_map = ''; > + my $uid_map = ''; > + > + for my $map ($id_map->@*) { > + my ($type, $ct, $host, $length) = $map->@*; > + > + $gid_map .= "$ct $host $length\n" if $type eq 'g'; > + $uid_map .= "$ct $host $length\n" if $type eq 'u'; > + } > + > + __set_id_map($pid, 'gid', $gid_map) if $gid_map; > + __set_id_map($pid, 'uid', $uid_map) if $uid_map; > +} do we gain a lot here from not just using newuidmap/newgidmap? > + > +sub wait_for_child($;$) { > + my ($pid, $noerr) = @_; > + my $interrupts = 0; > + while (waitpid($pid, 0) != $pid) { > + if ($! == EINTR) { > + warn "interrupted...\n"; > + kill(($interrupts > 3 ? 9 : 15), $pid); > + $interrupts++; > + } > + } > + my $status = POSIX::WEXITSTATUS($?); > + return $status if $noerr; > + > + if ($? == -1) { > + die "failed to execute\n"; > + } elsif (POSIX::WIFSIGNALED($?)) { > + my $sig = POSIX::WTERMSIG($?); > + die "got signal $sig\n"; > + } elsif ($status != 0) { > + warn "exit code $status\n"; > + } > + return $status; > +} > + > +sub forked(&%) { this seems very similar to the already existing PVE::Tools::run_fork / run_fork_with_timeout helpers.. any reason we can't extend those with `afterfork` support and use them? > + my ($code, %opts) = @_; > + > + pipe(my $except_r, my $except_w) or die "pipe: $!\n"; > + > + my $pid = fork(); > + die "fork failed: $!\n" if !defined($pid); > + > + if ($pid == 0) { > + close($except_r); > + eval { $code->() }; > + if ($@) { > + print {$except_w} $@; > + $except_w->flush(); > + POSIX::_exit(1); > + } > + POSIX::_exit(0); > + } > + close($except_w); > + > + my $err; > + if (my $afterfork = $opts{afterfork}) { > + eval { $afterfork->($pid); }; > + if ($err = $@) { > + kill(15, $pid); > + $opts{noerr} = 1; > + } > + } > + if (!$err) { > + $err = do { local $/ = undef; <$except_r> }; > + } > + my $rv = wait_for_child($pid, $opts{noerr}); > + die $err if $err; > + die "an unknown error occurred\n" if $rv != 0; > + return $rv; > +} > + > +sub run_in_userns(&;$) { > + my ($code, $id_map) = @_; > + socketpair(my $sp, my $sc, AF_UNIX, SOCK_STREAM, PF_UNSPEC) > + or die "socketpair: $!\n"; > + forked(sub { > + close($sp); > + unshare(CLONE_NEWUSER|CLONE_NEWNS) or die "unshare(NEWUSER|NEWNS): > $!\n"; I guess we can't set our "own" maps here for lack of capabilities and avoid the whole afterfork thing entirely? at least I couldn't get it to work ;) > + syswrite($sc, "1\n") == 2 or die "write: $!\n"; > + shutdown($sc, 1); > + my $two = <$sc>; > + die "failed to sync with parent process\n" if $two ne "2\n"; > + close($sc); > + $! = undef; > + ($(, $)) = (0, 0); die "$!\n" if $!; > + ($<, $>) = (0, 0); die "$!\n" if $!; > + $code->(); > + }, afterfork => sub { > + my ($pid) = @_; > + close($sc); > + my $one = <$sp>; > + die "failed to sync with userprocess\n" if $one ne "1\n"; > + set_id_map($pid, $id_map); > + syswrite($sp, "2\n") == 2 or die "write: $!\n"; > + close($sp); > + }); > +} > + > +1; > -- > 2.39.5 > > > > _______________________________________________ > pve-devel mailing list > pve-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel > > > _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel