compute node

David da Silva Pires Wed, 21 Aug 2019 05:24:45 -0700

Hi supers.

I am configuring a server with slurm/cgroups. This server will be the
unique slurm node, so it is the head and the compute node at the same time.
In order to force users to submit slurm jobs instead of running the
processes directly on the server, I would like to use cgroups to isolate
the last 8 CPUs in a cpuset for the users (acting as if they were the head
node). The other CPUs can be used by any slurm job.


I followed instructions from many sites on internet, but the final
configuration still do not do what I want. The processes started by normal
users in fact are allocated at the last 8 CPUs, but also the slurm jobs
that are submitted.

It seems that since jobs belong to normal users (not slurm user), they are
also limited by cgroups.

Is it possible to achieve what I want?

Here are my configuration files:

========================================================================
/etc/cgrules.conf
========================================================================
# <user> <controllers> <destination>
root  cpu,cpuset,memory /
slurm cpu,cpuset,memory /
* cpu,cpuset,memory interactive


========================================================================
/etc/cgconfig.conf
========================================================================
group interactive {
  cpu {
     cpu.shares = 100;
  }
  cpuset {
     cpuset.cpus = 216-223;
     cpuset.cpu_exclusive = 1;
     cpuset.mem_exclusive = 1;
     cpuset.mem_hardwall = 1;
     cpuset.memory_migrate = 0;
     cpuset.memory_spread_page = 0;
     cpuset.memory_spread_slab = 0;
     cpuset.mems = 0;
     cpuset.sched_load_balance = 0;
     cpuset.sched_relax_domain_level = -1;
  }
  memory {
     memory.limit_in_bytes = 8G;
     memory.swappiness = 41;
     memory.memsw.limit_in_bytes = 8G;
  }
}


========================================================================
slurm.conf
========================================================================
ControlMachine=vital
ControlAddr=172.25.2.25
AuthType=auth/munge
CryptoType=crypto/munge
GresTypes=gpu
MaxTasksPerNode=216
MpiDefault=none
ProctrackType=proctrack/cgroup
ReturnToService=1
SlurmctldPidFile=/var/run/slurm-llnl/slurmctld.pid
SlurmctldPort=6817
SlurmdPidFile=/var/run/slurm-llnl/slurmd.pid
SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurmd
SlurmUser=slurm
StateSaveLocation=/var/spool/slurm-llnl
SwitchType=switch/none
TaskPlugin=task/cgroup
TaskPluginParam=sched
InactiveLimit=0
KillWait=30
MinJobAge=300
SlurmctldTimeout=120
SlurmdTimeout=300
Waittime=0
DefMemPerNode=998749
FastSchedule=1
SchedulerType=sched/backfill
SelectType=select/cons_res
SelectTypeParameters=CR_CPU_Memory
AccountingStorageHost=vital
AccountingStorageLoc=slurm_acct_db
AccountingStoragePass=/var/run/munge/munge.socket.2
AccountingStoragePort=6819
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageUser=slurm
AccountingStoreJobComment=YES
ClusterName=bioinfo
JobCompHost=vital
JobCompLoc=slurm_acct_db
JobCompPass=aikeeCu4S
JobCompPort=6819
JobCompType=jobcomp/slurmdbd
JobCompUser=slurm
JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/cgroup
SlurmctldDebug=verbose
SlurmdDebug=verbose
BurstBufferType=burst_buffer/generic
NodeName=vital NodeAddr=172.25.2.25 CPUs=224 RealMemory=1031517 Sockets=4
CoresPerSocket=28 ThreadsPerCore=2 State=UNKNOWN Gres=gpu:1
MemSpecLimit=32768
PartitionName=batch Nodes=vital OverSubscribe=YES Default=YES
MaxTime=INFINITE State=UP


========================================================================
cgroup.conf
========================================================================
CgroupMountpoint="/sys/fs/cgroup"
CgroupAutomount=yes
AllowedRAMSpace=100
AllowedSwapSpace=0
ConstrainCores=no
ConstrainDevices=yes
ConstrainKmemSpace=no
ConstrainRAMSpace=no
ConstrainSwapSpace=no
MaxRAMPercent=100
MaxSwapPercent=100
TaskAffinity=no


Thanks in advance for any help.

--
David da Silva Pires

[slurm-users] Fwd: Slurm/cgroups on a single head/compute node

Reply via email to