[slurm-users] GraceTime is not working, But there is log.

김형진 Tue, 07 Nov 2023 17:30:06 -0800

Hello ~



Please help me.



Total GPU : 4

Large qos : 3 (max 3 gpus)

Base qos  : 2 (max 2 gpus)



I have a total of four GPUs,

and when a job with a large QoS is using three GPUs and a job with a base
QoS is created,

I want the large QoS job to wait for a certain period before the base QoS
job starts.

However, as soon as the base QoS job is created, the large QoS job is
immediately canceled without any waiting time.



But in the slurmctld log, there is a grace time log.

[2023-11-02T11:37:36.589] debug:  setting 3600 sec preemption grace time
for JobId=153 to reclaim resources for JobId=154



Could you help me understand what might be going wrong?



Here's my Slurm configuration details.

If you need any more information, please feel free to reply at any time.





*### /etc/slurm/slurm.conf ###*

# cat /etc/slurm/slurm.conf

# slurm.conf file generated by configurator.html.

# Put this file on all nodes of your cluster.

# See the slurm.conf man page for more information.

#

# Global Configuration

ClusterName=cluster

SlurmctldHost=master01

SlurmUser=slurm

GresTypes=gpu

JobRequeue=1

ProctrackType=proctrack/cgroup

ReturnToService=2

StateSaveLocation=/NFS/slurm/ctld

SwitchType=switch/none

TaskPlugin=task/cgroup,task/affinity



# SLRUMCTLD

SlurmctldPidFile=/var/spool/slurm/slurmctld.pid

SlurmctldLogFile=/var/log/slurm//slurmctld.log

SlurmctldTimeout=30

SlurmctldDebug=debug5



# SLURMD

SlurmdLogFile=/var/log/slurm/slurmd.log

SlurmdPidFile=/var/spool/slurm/slurmd.pid

SlurmdSpoolDir=/var/spool/slurm/

SlurmdTimeout=30

SlurmdDebug=debug5



# SCHEDULING

SchedulerType=sched/backfill



# JOB PRIORITY

PriorityType=priority/multifactor

PriorityWeightQOS=10000





# Select Resource

SelectType=select/cons_tres

SelectTypeParameters=CR_CPU



# Job

JobAcctGatherType=jobacct_gather/cgroup

JobCompUser=slurm

JobCompType=jobcomp/filetxt

JobCompLoc=/NFS/slurm/job-comp/slurm_jobcomp.log

MinJobAge=3600



# Account

AccountingStoreFlags=job_comment

AccountingStorageType=accounting_storage/slurmdbd

AccountingStorageHost=master01

AccountingStoragePass=/var/run/munge/munge.socket.2

AccountingStorageUser=slurm

AccountingStorageTRES=gres/gpu

AccountingStorageEnforce=limits,qos



# COMPUTE NODES

NodeName=compute01 CPUs=8 Boards=1 SocketsPerBoard=1 CoresPerSocket=8
ThreadsPerCore=1 RealMemory=15731 State=UNKNOWN

NodeName=compute02 CPUs=8 Boards=1 SocketsPerBoard=1 CoresPerSocket=8
ThreadsPerCore=1 RealMemory=7679 State=UNKNOWN

NodeName=compute03 CPUs=8 Boards=1 SocketsPerBoard=1 CoresPerSocket=8
ThreadsPerCore=1 RealMemory=7679 State=UNKNOWN

PartitionName=cpu Nodes=compute0[1-3] Default=NO MaxTime=INFINITE State=UP



NodeName=gpu01 Gres=gpu:2 CPUs=8 Boards=1 SocketsPerBoard=8
CoresPerSocket=1 ThreadsPerCore=1 RealMemory=15731

NodeName=gpu02 Gres=gpu:1 CPUs=8 Boards=1 SocketsPerBoard=8
CoresPerSocket=1 ThreadsPerCore=1 RealMemory=15731

NodeName=gpu03 Gres=gpu:1 CPUs=8 Boards=1 SocketsPerBoard=8
CoresPerSocket=1 ThreadsPerCore=1 RealMemory=15731

PartitionName=gpu Nodes=gpu0[1-3] Default=YES MaxTime=INFINITE State=UP
OverSubscribe=FORCE:4



# Preemption

PreemptMode=CANCEL

PreemptType=preempt/qos



*### Slurmdbd ###*

# sacctmgr show qos

      Name   Priority  GraceTime    Preempt   PreemptExemptTime
PreemptMode                                    Flags UsageThres
UsageFactor       GrpTRES   GrpTRESMins GrpTRESRunMin GrpJobs GrpSubmit
GrpWall       MaxTRES MaxTRESPerNode   MaxTRESMins     MaxWall
MaxTRESPU MaxJobsPU MaxSubmitPU     MaxTRESPA MaxJobsPA MaxSubmitPA
MinTRES

---------- ---------- ---------- ---------- ------------------- -----------
---------------------------------------- ---------- -----------
------------- ------------- ------------- ------- --------- -----------
------------- -------------- ------------- ----------- -------------
--------- ----------- ------------- --------- ----------- -------------

    normal          0   00:00:00
cluster
1.000000


      base       1000   00:00:00      large
cluster
1.000000
gres/gpu=2


     large        100   01:00:00
cluster
1.000000
gres/gpu=3


     small        500   00:00:00
cluster
1.000000

gres/gpu=2



# sacctmgr show assoc

   Cluster    Account       User  Partition     Share   Priority
GrpJobs       GrpTRES GrpSubmit     GrpWall   GrpTRESMins MaxJobs
MaxTRES MaxTRESPerNode MaxSubmit     MaxWall   MaxTRESMins
QOS   Def QOS GrpTRESRunMin

---------- ---------- ---------- ---------- --------- ---------- -------
------------- --------- ----------- ------------- ------- -------------
-------------- --------- ----------- ------------- --------------------
--------- -------------

   cluster       root
1


   cluster       root       root
1

   cluster    suser01
1

   cluster    suser01    suser01
1

                                      base,large,small
base

   cluster    suser02
1


   cluster    suser02    suser02
1
   base,large      base

   cluster    suser03
1

   cluster    suser03    suser03                    1


     base,large
base

   cluster    suser04
1


   cluster    suser04    suser04
1
                                   base,large      base

   cluster      susol
1

   cluster      susol      susol                    1





*### Sample Job ###*

suser01 $ cat 4-suser01-large-qos-srun_gpu-burn.sh

#!/bin/bash -l

#SBATCH -J 4-suser01-large-qos-srun_gpu-burn.sh

#SBATCH -G 3

#SBATCH -q large



cd /NFS/gpu-burn

srun ./gpu_burn -d 120



suser01 $ cat 4-suser01-base-qos-srun_gpu-burn.sh

#!/bin/bash -l

#SBATCH -J 4-suser01-base-qos-srun_gpu-burn

#SBATCH -G 2



cd /NFS/gpu-burn

srun ./gpu_burn -d 120

[slurm-users] GraceTime is not working, But there is log.

Reply via email to