Dear All,

We set up scrun (slurm 23.11.5) integrated with rootless podman, referring to 
the official documentation.
https://slurm.schedmd.com/containers.html#podman-scrun
https://slurm.schedmd.com/scrun.html#SECTION_Example-%3CB%3Escrun.lua%3C/B%3E-scripts

However, runc/crun prints the error message and fails to run the container.
Looking at the error message from runc, it seems that there is a problem with 
the GID mapping configuration.

- runc
  [hoge@slm-master hoge]$ podman run centos echo "Hello World"
  time="2024-04-03T02:39:36Z" level=error msg="runc run failed: cannot specify 
gid=5 mount option for rootless container: User namespaces enabled, but no 
group mapping found."
  srun: error: slm-worker: task 0: Exited with exit code 1

- crun
  [hoge@slm-master hoge]$ podman run centos echo "Hello World"
  srun: error: slm-worker: task 0: Exited with exit code 1
  mount `devpts` to `dev/pts`: Invalid argument

When we removed the process of linux.gidMappings configuration from scrun.lua,
runc failed to run the container due to the different error, but crun succeeded.

- scrun.lua (The process of linux.gidMappings configuration)
... snip ...
  -- Provide default group map as root if one not provided
  -- mappings fail with build???
  if (true or config["linux"]["gidMappings"] == nil)
  then
      config["linux"]["gidMappings"] =
              {{containerID=process_group_id, hostID=math.floor(group_id), 
size=1}}
  end
... snip ...

- runc
  [hoge@slm-master hoge]$ podman run centos echo "Hello World"
  time="2024-04-03T02:37:18Z" level=error msg="runc run failed: User namespaces 
enabled, but no gid mappings found."
  srun: error: slm-worker: task 0: Exited with exit code 1

- crun
  [hoge@slm-master hoge]$ podman run centos echo "Hello World"
  Hello World

Therefore, we would like to know the following two questions.
  - The reason for the failure to run the container
  - Required or recommended configuration to run the container

Best Regards,
Toshiki Sonoda
- oci.conf(crun)

EnvExclude="^(SLURM_CONF|SLURM_CONF_SERVER)="
RunTimeEnvExclude="^(SLURM_CONF|SLURM_CONF_SERVER)="
RunTimeQuery="crun --rootless=true --root=/run/user/%U/ state %n.%u.%j.%s.%t"
RunTimeKill="crun --rootless=true --root=/run/user/%U/ kill -a %n.%u.%j.%s.%t"
RunTimeDelete="crun --rootless=true --root=/run/user/%U/ delete --force 
%n.%u.%j.%s.%t"
RunTimeRun="crun --rootless=true --root=/run/user/%U/ run --bundle %b 
%n.%u.%j.%s.%t"

- oci.conf(runc)

EnvExclude="^(SLURM_CONF|SLURM_CONF_SERVER)="
RunTimeEnvExclude="^(SLURM_CONF|SLURM_CONF_SERVER)="
RunTimeQuery="runc --rootless=true --root=/run/user/%U/ state %n.%u.%j.%s.%t"
RunTimeKill="runc --rootless=true --root=/run/user/%U/ kill -a %n.%u.%j.%s.%t"
RunTimeDelete="runc --rootless=true --root=/run/user/%U/ delete --force 
%n.%u.%j.%s.%t"
RunTimeRun="runc --rootless=true --root=/run/user/%U/ run %n.%u.%j.%s.%t -b %b"

- scrun.lua

local json = require 'json'
local open = io.open

local scratch_path = "/export/nfs/"

local function read_file(path)
    local file = open(path, "rb")
    if not file then return nil end
    local content = file:read "*all"
    file:close()
    return content
end

local function write_file(path, contents)
    local file = open(path, "wb")
    if not file then return nil end
    file:write(contents)
    file:close()
    return
end

function slurm_scrun_stage_in(id, bundle, spool_dir, config_file, job_id, 
user_id, group_id, job_env)
    slurm.log_debug(string.format("stage_in(%s, %s, %s, %s, %d, %d, %d)",
                       id, bundle, spool_dir, config_file, job_id, user_id, 
group_id))
    local config = json.decode(read_file(config_file))
    local rootfs = config["root"]["path"]

    rc, user = slurm.allocator_command(string.format("id -un %d", user_id))
    user = string.gsub(user, "%s+", "")
    local root = scratch_path..user.."/scrun"
    local dstfs = root.."/containers/"..id.."/rootfs/"
    local dstconf = root.."/containers/"..id.."/config.json"
    local p = root.."/containers/"..id.."/"

    os.execute(string.format("/usr/bin/env mkdir -p %s", dstfs))
    os.execute(string.format("/usr/bin/env rsync --numeric-ids --delete-after 
--ignore-errors -a -- %s/ %s/", rootfs, dstfs))
    slurm.set_bundle_path(p)
    slurm.set_root_path(p.."rootfs")

    -- Always force user namespace support in container or runc will reject
    local process_user_id = 0
    local process_group_id = 0

    if ((config["process"] ~= nil) and (config["process"]["user"] ~= nil))
    then
        -- resolve out user in the container
        if (config["process"]["user"]["uid"] ~= nil)
        then
            process_user_id=config["process"]["user"]["uid"]
        else
            process_user_id=0
        end

        -- resolve out group in the container
        if (config["process"]["user"]["gid"] ~= nil)
        then
            process_group_id=config["process"]["user"]["gid"]
        else
            process_group_id=0
        end

        -- purge additionalGids as they are not supported in rootless
        if (config["process"]["user"]["additionalGids"] ~= nil)
        then
            config["process"]["user"]["additionalGids"] = nil
        end
    end

    if (config["linux"] ~= nil)
    then
        -- force user namespace to always be defined for rootless mode
        local found = false
        if (config["linux"]["namespaces"] == nil)
        then
            config["linux"]["namespaces"] = {}
        else
            for _, namespace in ipairs(config["linux"]["namespaces"]) do
               if (namespace["type"] == "user")
               then
                   found=true
                   break
               end
            end
        end
        if (found == false)
        then
            table.insert(config["linux"]["namespaces"], {type= "user"})
        end

        -- Provide default user map as root if one not provided
        if (true or config["linux"]["uidMappings"] == nil)
        then
            config["linux"]["uidMappings"] =
                    {{containerID=process_user_id, hostID=math.floor(user_id), 
size=1}}
        end

        -- Provide default group map as root if one not provided
        -- mappings fail with build???
        if (true or config["linux"]["gidMappings"] == nil)
        then
            config["linux"]["gidMappings"] =
                    {{containerID=process_group_id, 
hostID=math.floor(group_id), size=1}}
        end

        -- disable trying to use a specific cgroup
        config["linux"]["cgroupsPath"] = nil
    end

    -- Merge in Job environment into container -- this is optional!
    if (config["process"]["env"] == nil)
    then
        config["process"]["env"] = {}
    end
    for _, env in ipairs(job_env) do
        table.insert(config["process"]["env"], env)
    end

    -- Remove all prestart hooks to squash any networking attempts
    if ((config["hooks"] ~= nil) and (config["hooks"]["prestart"] ~= nil))
    then
        config["hooks"]["prestart"] = nil
    end

    -- Remove all rlimits
    if ((config["process"] ~= nil) and (config["process"]["rlimits"] ~= nil))
    then
        config["process"]["rlimits"] = nil
    end

    write_file(dstconf, json.encode(config))
    return slurm.SUCCESS
end

function slurm_scrun_stage_out(id, bundle, orig_bundle, root_path, 
orig_root_path, spool_dir, config_file, jobid, user_id, group_id)
    os.execute("rm --one-file-system --preserve-root=all -rf "..bundle)
    return slurm.SUCCESS
end

slurm.log_info("initialized scrun.lua")

return slurm.SUCCESS

- podman info

host:
  arch: amd64
  buildahVersion: 1.31.3
  cgroupControllers:
  - memory
  - pids
  cgroupManager: systemd
  cgroupVersion: v2
  conmon:
    package: conmon-2.1.8-1.el9.x86_64
    path: /usr/bin/conmon
    version: 'conmon version 2.1.8, commit: 
cebaba63f66de0e92cdc7e2a59f39c9208281158'
  cpuUtilization:
    idlePercent: 99.92
    systemPercent: 0.05
    userPercent: 0.03
  cpus: 2
  databaseBackend: boltdb
  distribution:
    distribution: '"rocky"'
    version: "9.3"
  eventLogger: journald
  freeLocks: 2010
  hostname: slm-master.novalocal
  idMappings:
    gidmap:
    - container_id: 0
      host_id: 3002
      size: 1
    - container_id: 1
      host_id: 231072
      size: 65536
    uidmap:
    - container_id: 0
      host_id: 3002
      size: 1
    - container_id: 1
      host_id: 231072
      size: 65536
  kernel: 5.14.0-362.24.1.el9_3.x86_64
  linkmode: dynamic
  logDriver: journald
  memFree: 375476224
  memTotal: 3835568128
  networkBackend: netavark
  networkBackendInfo:
    backend: netavark
    dns:
      package: aardvark-dns-1.7.0-1.el9.x86_64
      path: /usr/libexec/podman/aardvark-dns
      version: aardvark-dns 1.7.0
    package: netavark-1.7.0-2.el9_3.x86_64
    path: /usr/libexec/podman/netavark
    version: netavark 1.7.0
  ociRuntime:
    name: slurm
    package: slurm-23.11.5-1.el9.x86_64
    path: /usr/bin/scrun
    version: |-
      scrun version 23.11.5
      spec: 1.0.0
  os: linux
  pasta:
    executable: ""
    package: ""
    version: ""
  remoteSocket:
    path: /run/user/3002/podman/podman.sock
  security:
    apparmorEnabled: false
    capabilities: 
CAP_CHOWN,CAP_DAC_OVERRIDE,CAP_FOWNER,CAP_FSETID,CAP_KILL,CAP_NET_BIND_SERVICE,CAP_SETFCAP,CAP_SETGID,CAP_SETPCAP,CAP_SETUID,CAP_SYS_CHROOT
    rootless: true
    seccompEnabled: true
    seccompProfilePath: /usr/share/containers/seccomp.json
    selinuxEnabled: false
  serviceIsRemote: false
  slirp4netns:
    executable: /usr/bin/slirp4netns
    package: slirp4netns-1.2.1-1.el9.x86_64
    version: |-
      slirp4netns version 1.2.1
      commit: 09e31e92fa3d2a1d3ca261adaeb012c8d75a8194
      libslirp: 4.4.0
      SLIRP_CONFIG_VERSION_MAX: 3
      libseccomp: 2.5.2
  swapFree: 0
  swapTotal: 0
  uptime: 116h 30m 51.00s (Approximately 4.83 days)
plugins:
  authorization: null
  log:
  - k8s-file
  - none
  - passthrough
  - journald
  network:
  - bridge
  - macvlan
  - ipvlan
  volume:
  - local
registries:
  search:
  - registry.access.redhat.com
  - registry.redhat.io
  - docker.io
store:
  configFile: /export/nfs/hoge/.config/containers/storage.conf
  containerStore:
    number: 32
    paused: 0
    running: 0
    stopped: 32
  graphDriverName: vfs
  graphOptions:
    vfs.ignore_chown_errors: "true"
  graphRoot: /export/nfs/hoge/containers
  graphRootAllocated: 31042023424
  graphRootUsed: 6336229376
  graphStatus: {}
  imageCopyTmpDir: /var/tmp
  imageStore:
    number: 3
  runRoot: /export/nfs/hoge/containers
  transientStore: false
  volumePath: /export/nfs/hoge/containers/volumes
version:
  APIVersion: 4.6.1
  Built: 1709719721
  BuiltTime: Wed Mar  6 10:08:41 2024
  GitCommit: ""
  GoVersion: go1.20.12
  Os: linux
  OsArch: linux/amd64
  Version: 4.6.1

- storage.conf

[storage]
driver = "vfs"
runroot = "$HOME/containers"
graphroot = "$HOME/containers"

[storage.options]
pull_options = {use_hard_links = "true", enable_partial_images = "true"}

[storage.options.vfs]
ignore_chown_errors = "true"

- containers.conf

[containers]
apparmor_profile = "unconfined"
cgroupns = "host"
cgroups = "enabled"
default_sysctls = []
label = false
netns = "host"
no_hosts = true
pidns = "host"
utsns = "host"
userns = "host"

[engine]
cgroup_manager = "systemd"
runtime = "slurm"
runtime_supports_nocgroups = [ "slurm" ]
runtime_supports_json = [ "slurm" ]
remote = false

[engine.runtimes]
slurm = [ "/usr/bin/scrun" ]

- /etc/os-release

NAME="Rocky Linux"
VERSION="9.3 (Blue Onyx)"
ID="rocky"
ID_LIKE="rhel centos fedora"
VERSION_ID="9.3"
PLATFORM_ID="platform:el9"
PRETTY_NAME="Rocky Linux 9.3 (Blue Onyx)"
ANSI_COLOR="0;32"
LOGO="fedora-logo-icon"
CPE_NAME="cpe:/o:rocky:rocky:9::baseos"
HOME_URL="https://rockylinux.org/";
BUG_REPORT_URL="https://bugs.rockylinux.org/";
SUPPORT_END="2032-05-31"
ROCKY_SUPPORT_PRODUCT="Rocky-Linux-9"
ROCKY_SUPPORT_PRODUCT_VERSION="9.3"
REDHAT_SUPPORT_PRODUCT="Rocky Linux"
REDHAT_SUPPORT_PRODUCT_VERSION="9.3"
-- 
slurm-users mailing list -- slurm-users@lists.schedmd.com
To unsubscribe send an email to slurm-users-le...@lists.schedmd.com

Reply via email to