Hey All, I was just hoping to find out if anyone can explain how a job running on a single node was able to have a MaxRSS of 240% reported by seff. Below is some specifics about the job that was run. We're using slurm 19.05.7 on CentOS 8.2/ . [root@hpc-node01 ~]# scontrol show jobid -dd 97036 JobId=97036 JobName=jobbie.sh UserId=username(012344321) GroupId=domain users(214400513) MCS_label=N/A Priority=4294842062 Nice=0 Account=(null) QOS=normal JobState=COMPLETED Reason=None Dependency=(null) Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0 DerivedExitCode=0:0 RunTime=00:22:35 TimeLimit=8-08:00:00 TimeMin=N/A SubmitTime=2022-12-02T14:27:47 EligibleTime=2022-12-02T14:27:48 AccrueTime=2022-12-02T14:27:48 StartTime=2022-12-02T14:27:48 EndTime=2022-12-02T14:50:23 Deadline=N/A SuspendTime=None SecsPreSuspend=0 LastSchedEval=2022-12-02T14:27:48 Partition=defq AllocNode:Sid=hpc-node01:3921213 ReqNodeList=(null) ExcNodeList=(null) NodeList=hpc-node11 BatchHost=hpc-node11 NumNodes=1 NumCPUs=40 NumTasks=0 CPUs/Task=40 ReqB:S:C:T=0:0:*:* TRES=cpu=40,mem=350G,node=1,billing=40 Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=* Nodes=hpc-node11 CPU_IDs=0-79 Mem=358400 GRES= MinCPUsNode=40 MinMemoryNode=350G MinTmpDiskNode=0 Features=(null) DelayBoot=00:00:00 OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null) Command=/path/to/scratch/jobbie.sh WorkDir=/path/to/scratch StdErr=/path/to/scratch/jobbie.sh-97036.error StdIn=/dev/null StdOut=/path/to/scratch/jobbie.sh-97036.out Power=
[root@hpc-node01 ~]# seff 97036 Job ID: 97036 Cluster: slurm Use of uninitialized value $user in concatenation (.) or string at /cm/shared/apps/slurm/current/bin/seff line 154, <DATA> line 604. User/Group: /domain users State: COMPLETED (exit code 0) Nodes: 1 Cores per node: 40 CPU Utilized: 04:43:36 CPU Efficiency: 31.39% of 15:03:20 core-walltime Job Wall-clock time: 00:22:35 Memory Utilized: 840.21 GB Memory Efficiency: 240.06% of 350.00 GB [root@hpc-node11 ~]# free -m total used free shared buff/cache available Mem: 385587 2761 268544 7891 114281 371865 Swap: 0 0 0 [root@hpc-node11 ~]# cat /path/to/scratch/jobbie.sh #!/bin/bash #SBATCH --mail-user=usern...@bcchr.ca #SBATCH --mail-type=ALL ## CPU Usage #SBATCH --mem=350G #SBATCH --cpus-per-task=40 #SBATCH --time=200:00:00 #SBATCH --nodes=1 ## Output and Stderr #SBATCH --output=%x-%j.out #SBATCH --error=%x-%j.error source /path/to/tools/Miniconda3/opt/miniconda3/etc/profile.d/conda.sh conda activate nanomethphase # Working dir Working_Dir=/path/to/scratch/ # Case methyl sample Case=$Working_Dir/Case/methylation_frequency.tsv # Control, here using a dir with both parents Control=$Working_Dir/Control/ # DMA call /path/to/tools/NanoMethPhase/NanoMethPhase/nanomethphase.py dma \ --case $Case \ --control $Control \ --columns 1,2,5,7 \ --out_prefix DH0808_Proband_vs_Controls_DMA \ --out_dir $Working_Dir Daryl Roche