From: Wolfgang Bumiller <>

Signed-off-by: Wolfgang Bumiller <>
 src/PVE/LXC/  | 128 +++++++++++++++++++++++++++
 src/PVE/LXC/ | 196 +++++++++++++++++++++++++++++++++++++++++
 src/PVE/LXC/Makefile   |   2 +
 3 files changed, 326 insertions(+)
 create mode 100644 src/PVE/LXC/
 create mode 100644 src/PVE/LXC/

diff --git a/src/PVE/LXC/ b/src/PVE/LXC/
new file mode 100644
index 0000000..7561fb2
--- /dev/null
+++ b/src/PVE/LXC/
@@ -0,0 +1,128 @@
+# cgroup handler
+# This package should deal with figuring out the right cgroup path for a
+# container (via the command socket), reading and writing cgroup values, and
+# handling cgroup v1 & v2 differences.
+# Note that the long term plan is to have resource manage functions intead of
+# dealing with cgroup files on the outside.
+package PVE::LXC::CGroup;
+use strict;
+use warnings;
+use PVE::LXC::Command;
+# We don't want to do a command socket round trip for every cgroup read/write,
+# so any cgroup function needs to have the container's path cached, so this
+# package has to be instantiated.
+# LXC keeps separate paths by controller (although they're normally all the
+# same, in our # case anyway), so we cache them by controller as well.
+sub new {
+    my ($class, $vmid) = @_;
+    my $self = { vmid => $vmid };
+    return bless $self, $class;
+my $CPUSET_BASE = undef;
+# Find the cpuset cgroup controller.
+# This is a function, not a method!
+sub cpuset_controller_path() {
+    if (!defined($CPUSET_BASE)) {
+       my $CPUSET_PATHS = [
+           # legacy cpuset cgroup:
+           ['/sys/fs/cgroup/cpuset',  'cpuset.effective_cpus'],
+           # pure cgroupv2 environment:
+           ['/sys/fs/cgroup',         'cpuset.cpus.effective'],
+           # hybrid, with cpuset moved to cgroupv2
+           ['/sys/fs/cgroup/unified', 'cpuset.cpus.effective'],
+       ];
+       my ($result) = grep { -f "$_->[0]/$_->[1]" } @$CPUSET_PATHS;
+       die "failed to find cpuset controller\n" if !defined($result);
+       $CPUSET_BASE = $result->[0];
+    }
+    return $CPUSET_BASE;
+my $CGROUP_MODE = undef;
+# Figure out which cgroup mode we're operating under:
+# Returns 1 if cgroupv1 controllers exist (hybrid or legacy mode), and 2 in a
+# cgroupv2-only environment.
+# This is a function, not a method!
+sub cgroup_mode() {
+    if (!defined($CGROUP_MODE)) {
+       my ($v1, $v2) = PVE::LXC::get_cgroup_subsystems();
+       if (keys %$v1) {
+           # hybrid or legacy mode
+           $CGROUP_MODE = 1;
+       } elsif ($v2) {
+           $CGROUP_MODE = 2;
+       }
+    }
+    die "unknown cgroup mode\n" if !defined($CGROUP_MODE);
+    return $CGROUP_MODE;
+# Get a subdirectory (without the cgroup mount point) for a controller.
+# If `$controller` is `undef`, get the unified (cgroupv2) path.
+# Note that in cgroup v2, lxc uses the activated controller names
+# (`cgroup.controllers` file) as list of controllers for the unified hierarchy,
+# so this returns a result when a `controller` is provided even when using
+# a pure cgroupv2 setup.
+my sub get_subdir {
+    my ($self, $controller, $limiting) = @_;
+    my $entry_name = $controller || 'unified';
+    my $entry = ($self->{controllers}->{$entry_name} //= {});
+    my $kind = $limiting ? 'limit' : 'ns';
+    my $path = $entry->{$kind};
+    return $path if defined $path;
+    $path = PVE::LXC::Command::get_cgroup_path(
+       $self->{vmid},
+       $controller,
+       $limiting,
+    ) or return undef;
+    # untaint:
+    if ($path =~ /\.\./) {
+       die "lxc returned suspicious path: '$path'\n";
+    }
+    ($path) = ($path =~ /^(.*)$/s);
+    $entry->{$kind} = $path;
+    return $path;
+# Get a path for a controller.
+# `$controller` may be `undef`, see get_subdir above for details.
+sub get_path {
+    my ($self, $controller) = @_;
+    my $path = get_subdir($self, $controller)
+       or return undef;
+    # The main mount point we currenlty assume to be in a standard location.
+    return "/sys/fs/cgroup/$path" if cgroup_mode() == 2;
+    return "/sys/fs/cgroup/unified/$path" if !defined($controller);
+    return "/sys/fs/cgroup/$controller/$path";
diff --git a/src/PVE/LXC/ b/src/PVE/LXC/
new file mode 100644
index 0000000..2fd4e81
--- /dev/null
+++ b/src/PVE/LXC/
@@ -0,0 +1,196 @@
+# LXC command socket client.
+# For now this is only used to fetch the cgroup paths.
+# This can also be extended to replace a few more `lxc-*` CLI invocations.
+# (such as lxc-stop, info, freeze, unfreeze, or getting the init pid)
+package PVE::LXC::Command;
+use strict;
+use warnings;
+use IO::Socket::UNIX;
+use base 'Exporter';
+use constant {
+our @EXPORT_OK = qw(
+    raw_command_transaction
+    simple_command
+    get_cgroup_path
+# Get the command socket for a container.
+my sub _get_command_socket($) {
+    my ($vmid) = @_;
+    my $sock = IO::Socket::UNIX->new(
+       Type => SOCK_STREAM(),
+       Peer => "\0/var/lib/lxc/$vmid/command",
+    );
+    if (!defined($sock)) {
+       return undef if $!{ECONNREFUSED};
+       die "failed to connect to command socket: $!\n";
+    }
+    # The documentation for this talks more about the receiving end, and it
+    # also *mostly works without, but then the kernel *sometimes* fails to
+    # provide correct credentials.
+    setsockopt($sock, SOL_SOCKET, SO_PASSCRED, 1)
+        or die "failed to pass credentials to command socket: $!\n";
+    return $sock;
+# Create an lxc_cmd_req struct.
+my sub _lxc_cmd_req($$) {
+    my ($cmd, $datalen) = @_;
+    # struct lxc_cmd_req {
+    #     lxc_cmd_t cmd;
+    #     int datalen;
+    #     const void *data;
+    # };
+    #
+    # Obviously the pointer makes no sense in the payload so we just use NULL.
+    my $packet = pack('i!i!L!', $cmd, $datalen, 0);
+    return $packet;
+# Unpack an lxc_cmd_rsp into result into its result and payload length.
+my sub _unpack_lxc_cmd_rsp($) {
+    my ($packet) = @_;
+    #struct lxc_cmd_rsp {
+    #    int ret; /* 0 on success, -errno on failure */
+    #    int datalen;
+    #    void *data;
+    #};
+    # We drop the pointless pointer value.
+    my ($ret, $len, undef) = unpack("i!i!L!", $packet);
+    return ($ret, $len);
+# Send a complete packet:
+my sub _do_send($$) {
+    my ($sock, $data) = @_;
+    my $sent = send($sock, $data, 0)
+       // die "failed to send to command socket: $!\n";
+    die "short write on command socket ($sent != ".length($data).")\n"
+       if $sent != length($data);
+# Send a complete packet:
+my sub _do_recv($\$$) {
+    my ($sock, $scalar, $len) = @_;
+    my $got = recv($sock, $$scalar, $len, 0)
+       // die "failed to read from command socket: $!\n";
+    die "short read on command socket ($len != ".length($$scalar).")\n"
+       if length($$scalar) != $len;
+# Receive a response from an lxc command socket.
+# Performs the return value check (negative errno values) and returns the
+# return value and payload in array context, or just the payload in scalar
+# context.
+my sub _recv_response($) {
+    my ($socket) = @_;
+    my $buf = pack('i!i!L!', 0, 0, 0); # struct lxc_cmd_rsp
+    _do_recv($socket, $buf, length($buf));
+    my ($res, $datalen) = _unpack_lxc_cmd_rsp($buf);
+    my $data;
+    _do_recv($socket, $data, $datalen)
+       if $datalen > 0;
+    if ($res < 0) {
+       $! = -$res;
+       die "command failed: $!\n";
+    }
+    return wantarray ? ($res, $data) : $data;
+# Perform a command transaction: Send command & payload, receive and unpack the
+# response.
+sub raw_command_transaction($$;$) {
+    my ($socket, $cmd, $data) = @_;
+    $data //= '';
+    my $req = _lxc_cmd_req(LXC_CMD_GET_CGROUP, length($data));
+    _do_send($socket, $req);
+    if (length($data) > 0) {
+       _do_send($socket, $data);
+    }
+    return _recv_response($socket);
+# Perform a command transaction for a VMID where no command socket has been
+# established yet.
+# Returns ($ret, $data):
+#    $ret: numeric return value (typically 0)
+#    $data: optional data returned for the command, if any, otherwise undef
+# Returns undef if the container is not running, dies on errors.
+sub simple_command($$;$) {
+    my ($vmid, $cmd, $data) = @_;
+    my $socket = _get_command_socket($vmid)
+       or return undef;
+    return raw_command_transaction($socket, $cmd, $data);
+# Retrieve the cgroup path for a running container.
+# If $limiting is set, get the payload path without the namespace subdirectory,
+# otherwise return the full namespaced path.
+# Returns undef if the container is not running, dies on errors.
+sub get_cgroup_path($;$$) {
+    my ($vmid, $subsystem, $limiting) = @_;
+    # subsystem name must be a zero-terminated C string.
+    my ($res, $data) = simple_command(
+       $vmid,
+       pack('Z*', $subsystem),
+    );
+    return undef if !defined $res;
+    # data is a zero-terminated string:
+    return unpack('Z*', $data);
+# Retrieve the cgroup path for a running container.
+# If $limiting is set, get the payload path without the namespace subdirectory,
+# otherwise return the full namespaced path.
+# Returns undef if the container is not running, dies on errors.
+sub get_limiting_cgroup_path($;$) {
+    my ($vmid, $subsystem) = @_;
+    # subsystem name must be a zero-terminated C string.
+    my ($res, $data) = simple_command(
+       $vmid,
+       pack('Z*', $subsystem),
+    );
+    return undef if !defined $res;
+    # data is a zero-terminated string:
+    return unpack('Z*', $data);
diff --git a/src/PVE/LXC/Makefile b/src/PVE/LXC/Makefile
index d889204..f4f4dc1 100644
--- a/src/PVE/LXC/Makefile
+++ b/src/PVE/LXC/Makefile
@@ -1,4 +1,6 @@
+ \
+ \ \ \ \

pve-devel mailing list

Reply via email to