The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh9-5.14.0-4.vz9.10.12 ------> commit 7b72da674e3043f2f6a1e43db04d6d23ec648fbe Author: Andrey Ryabinin <ryabinin....@gmail.com> Date: Wed Oct 20 11:39:25 2021 +0300
ve/page_alloc, kstat: account allocation latencies per-task and per-thread Vstorage wants per-process allocation latencies: - total accumulated latency (total time spent inside the kernel allocator) - total alloc attempts (so that average latency can be calculated) This adds /proc/<pid>/vz_latency file which outputs the numbers: Type Total_lat Calls allocatomic: 0 1334 alloc: 8000000 36643 allocmp: 0 919 https://jira.sw.ru/browse/PSBM-81395 Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> Cc: Pavel Borzenkov <pborzen...@virtuozzo.com> (cherry-picked from vz7 commit 6d9a9210395e ("ve/page_alloc, kstat: account allocation latencies per-task")) Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com> +++ ve/kstat/alloc_lat: Initialize alloc_lat to zero at start It seems that 'struct task_struct' not initialized to zero after allocation. Thus we need to initialize alloc_lat explicitly. https://jira.sw.ru/browse/PSBM-81395 Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> Reviewed-by: Kirill Tkhai <ktk...@virtuozzo.com> (cherry-picked from vz7 commit 82ddc4c43f2d ("ve/kstat/alloc_lat: Initialize alloc_lat to zero at start")) Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com> +++ ve/fs/proc: Make per-thread and per-process allocation latencies. Follow-up for 6d9a9210395e ("ve/page_alloc, kstat: account allocation latencies per-task") Make per-thread and per-process allocation latencies: - /proc/<pid>/vz_latency - cumulative for a thread group - /proc/<pid>/tasks/<pid>/vz_latency - thread-specific During allocation we collect per-thread latency. When thread dies, it submits its own latencies into shared task->signal.alloc_lat struct. /proc/<pid>/vz_latency - sums allocation latencies over all live threads plus latencies of already dead tasks from task->signal.alloc_lat. https://jira.sw.ru/browse/PSBM-81395 Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> Reviewed-by: Kirill Tkhai <ktk...@virtuozzo.com> Cc: Pavel Borzenkov <pavel.borzen...@acronis.com> Rebase to vz8: - As signal_struct moved from sched.h to sched/signal.h so changes did (cherry-picked from vz7 commit c4cb66d5e706 ("ve/fs/proc: Make per-thread and per-process allocation latencies.")) Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com> +++ vz_latency: don't account allocations in interrupts to random tasks When we in interrupt, the 'current' is just any random task. We shouldn't account per-task atomic allocations latency to random tasks. Use in_task() macro to identify task context, and account per-task latency iff we in task. https://jira.sw.ru/browse/PSBM-87797 Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> Reviewed-by: Denis V. Lunev <d...@openvz.org> (cherry-picked from vz7 commit 3ed23cb6c686 ("vz_latency: don't account allocations in interrupts to random tasks")) Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com> (cherry-picked from vz8 commit 374a4f030d35 ("ve/page_alloc, kstat: account allocation latencies per-task and per-thread")) Signed-off-by: Nikita Yushchenko <nikita.yushche...@virtuozzo.com> --- fs/proc/base.c | 79 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 5 +++ include/linux/sched/signal.h | 3 ++ kernel/exit.c | 16 +++++++++ kernel/fork.c | 4 +++ mm/page_alloc.c | 5 +++ 6 files changed, 112 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index a02acdaa3ca5..f395ec139587 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -56,6 +56,7 @@ #include <linux/stat.h> #include <linux/task_io_accounting_ops.h> #include <linux/init.h> +#include <linux/kstat.h> #include <linux/capability.h> #include <linux/file.h> #include <linux/fdtable.h> @@ -544,6 +545,78 @@ static const struct file_operations proc_lstats_operations = { #endif +#ifdef CONFIG_VE +static void lastlat_seq_show(struct seq_file *m, + const char *name, + struct kstat_lat_snap_struct *snap) +{ + seq_printf(m, "%-12s %20Lu %20lu\n", name, + snap->totlat, snap->count); +} +static const char *alloc_descr[] = { + "allocatomic:", + "alloc:", + "allocmp:", +}; +static const int alloc_types[] = { + KSTAT_ALLOCSTAT_ATOMIC, + KSTAT_ALLOCSTAT_LOW, + KSTAT_ALLOCSTAT_LOW_MP, +}; + +static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + int i; + + seq_printf(m, "%-12s %20s %20s\n", + "Type", "Total_lat", "Calls"); + + for (i = 0; i < ARRAY_SIZE(alloc_types); i++) + lastlat_seq_show(m, alloc_descr[i], + &task->alloc_lat[alloc_types[i]]); + return 0; +} + +static int proc_tgid_vz_lat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + int i; + unsigned long flags; + u64 lat[ARRAY_SIZE(alloc_types)]; + u64 count[ARRAY_SIZE(alloc_types)]; + + for (i = 0; i < ARRAY_SIZE(alloc_types); i++) { + lat[i] = task->alloc_lat[alloc_types[i]].totlat; + count[i] = task->alloc_lat[alloc_types[i]].count; + } + + if (lock_task_sighand(task, &flags)) { + struct task_struct *t = task; + while_each_thread(task, t) { + for (i = 0; i < ARRAY_SIZE(alloc_types); i++) { + lat[i] += t->alloc_lat[alloc_types[i]].totlat; + count[i] += t->alloc_lat[alloc_types[i]].count; + } + } + for (i = 0; i < ARRAY_SIZE(alloc_types); i++) { + lat[i] += t->signal->alloc_lat[alloc_types[i]].totlat; + count[i] += t->signal->alloc_lat[alloc_types[i]].count; + } + unlock_task_sighand(task, &flags); + } + + seq_printf(m, "%-12s %20s %20s\n", + "Type", "Total_lat", "Calls"); + + for (i = 0; i < ARRAY_SIZE(alloc_types); i++) + seq_printf(m, "%-12s %20Lu %20Lu\n", alloc_descr[i], + lat[i], count[i]); + + return 0; +} +#endif + static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3331,6 +3404,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif +#ifdef CONFIG_VE + ONE("vz_latency", S_IRUGO, proc_tgid_vz_lat), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3664,6 +3740,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif +#ifdef CONFIG_VE + ONE("vz_latency", S_IRUGO, proc_tid_vz_lat), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/sched.h b/include/linux/sched.h index 332c36a8f4c4..e0a562d76aa1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -35,6 +35,7 @@ #include <linux/seqlock.h> #include <linux/kcsan.h> #include <asm/kmap_size.h> +#include <linux/kstat.h> /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -1242,6 +1243,10 @@ struct task_struct { struct tlbflush_unmap_batch tlb_ubc; +#ifdef CONFIG_VE + struct kstat_lat_snap_struct alloc_lat[KSTAT_ALLOCSTAT_NR]; +#endif + union { refcount_t rcu_users; struct rcu_head rcu; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index b9126fe06c3f..b48fba3fc9dd 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -208,6 +208,9 @@ struct signal_struct { #ifdef CONFIG_TASKSTATS struct taskstats *stats; #endif +#ifdef CONFIG_VE + struct kstat_lat_snap_struct alloc_lat[KSTAT_ALLOCSTAT_NR]; +#endif #ifdef CONFIG_AUDIT unsigned audit_tty; struct tty_audit_buf *tty_audit_buf; diff --git a/kernel/exit.c b/kernel/exit.c index d7b901c6b07a..5a0607ee41b1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -750,6 +750,20 @@ static void check_stack_usage(void) static inline void check_stack_usage(void) {} #endif +void kstat_add_dying(struct task_struct *tsk) +{ +#ifdef CONFIG_VE + int i; + + spin_lock_irq(&tsk->sighand->siglock); + for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) { + tsk->signal->alloc_lat[i].totlat += tsk->alloc_lat[i].totlat; + tsk->signal->alloc_lat[i].count += tsk->alloc_lat[i].count; + } + spin_unlock_irq(&tsk->sighand->siglock); +#endif +} + void __noreturn do_exit(long code) { struct task_struct *tsk = current; @@ -826,6 +840,8 @@ void __noreturn do_exit(long code) #endif if (tsk->mm) setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); + } else { + kstat_add_dying(tsk); } acct_collect(code, group_dead); if (group_dead) diff --git a/kernel/fork.c b/kernel/fork.c index 61adb6409f0d..7109198797ac 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -928,6 +928,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (orig->cpus_ptr == &orig->cpus_mask) tsk->cpus_ptr = &tsk->cpus_mask; +#ifdef CONFIG_VE + memset(tsk->alloc_lat, 0, sizeof(tsk->alloc_lat)); +#endif + /* * One for the user space visible state that goes away when reaped. * One for the scheduler. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3fa186ba631f..ce726b40fcf6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5428,6 +5428,11 @@ static void __alloc_collect_stats(gfp_t gfp_mask, unsigned int order, cpu = smp_processor_id(); KSTAT_LAT_PCPU_ADD(&kstat_glob.alloc_lat[ind], delta); + if (in_task()) { + current->alloc_lat[ind].totlat += delta; + current->alloc_lat[ind].count++; + } + if (!page) kstat_glob.alloc_fails[cpu][ind]++; local_irq_restore(flags); _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel