[Devel] [PATCH RHEL9 COMMIT] ve/page_alloc, kstat: account allocation latencies per-task and per-thread

Konstantin Khorenko Wed, 20 Oct 2021 05:41:52 -0700

The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at 
https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-4.vz9.10.12
------>
commit 7b72da674e3043f2f6a1e43db04d6d23ec648fbe
Author: Andrey Ryabinin <ryabinin....@gmail.com>
Date:   Wed Oct 20 11:39:25 2021 +0300


    ve/page_alloc, kstat: account allocation latencies per-task and per-thread
    
    Vstorage wants per-process allocation latencies:
    
     - total accumulated latency (total time spent inside the kernel allocator)
     - total alloc attempts (so that average latency can be calculated)
    
    This adds /proc/<pid>/vz_latency file which outputs the numbers:
    
    Type                   Total_lat                Calls
    allocatomic:                    0                 1334
    alloc:                    8000000                36643
    allocmp:                        0                  919
    
    https://jira.sw.ru/browse/PSBM-81395
    Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
    
    Cc: Pavel Borzenkov <pborzen...@virtuozzo.com>
    
    (cherry-picked from vz7 commit 6d9a9210395e ("ve/page_alloc, kstat: account
    allocation latencies per-task"))
    
    Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>
    
    +++
    ve/kstat/alloc_lat: Initialize alloc_lat to zero at start
    
    It seems that 'struct task_struct' not initialized to zero after
    allocation. Thus we need to initialize alloc_lat explicitly.
    
    https://jira.sw.ru/browse/PSBM-81395
    Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
    
    Reviewed-by: Kirill Tkhai <ktk...@virtuozzo.com>
    
    (cherry-picked from vz7 commit 82ddc4c43f2d ("ve/kstat/alloc_lat: Initialize
    alloc_lat to zero at start"))
    
    Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>
    
    +++
    ve/fs/proc: Make per-thread and per-process allocation latencies.
    
    Follow-up for 6d9a9210395e ("ve/page_alloc, kstat: account allocation 
latencies per-task")
    Make per-thread and per-process allocation latencies:
    
      - /proc/<pid>/vz_latency - cumulative for a thread group
      - /proc/<pid>/tasks/<pid>/vz_latency - thread-specific
    
    During allocation we collect per-thread latency. When thread dies,
    it submits its own latencies into shared task->signal.alloc_lat struct.
    /proc/<pid>/vz_latency - sums allocation latencies over all live threads
    plus latencies of already dead tasks from task->signal.alloc_lat.
    
    https://jira.sw.ru/browse/PSBM-81395
    Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
    
    Reviewed-by: Kirill Tkhai <ktk...@virtuozzo.com>
    Cc: Pavel Borzenkov <pavel.borzen...@acronis.com>
    
    Rebase to vz8:
     - As signal_struct moved from sched.h to sched/signal.h so changes did
    
    (cherry-picked from vz7 commit c4cb66d5e706 ("ve/fs/proc: Make per-thread 
and
    per-process allocation latencies."))
    
    Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>
    
    +++
    vz_latency: don't account allocations in interrupts to random tasks
    
    When we in interrupt, the 'current' is just any random task. We shouldn't
    account per-task atomic allocations latency to random tasks. Use in_task()
    macro to identify task context, and account per-task latency iff we in
    task.
    
    https://jira.sw.ru/browse/PSBM-87797
    
    Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
    
    Reviewed-by: Denis V. Lunev <d...@openvz.org>
    
    (cherry-picked from vz7 commit 3ed23cb6c686 ("vz_latency: don't account
    allocations in interrupts to random tasks"))
    
    Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>
    
    (cherry-picked from vz8 commit 374a4f030d35 ("ve/page_alloc, kstat: account
    allocation latencies per-task and per-thread"))
    
    Signed-off-by: Nikita Yushchenko <nikita.yushche...@virtuozzo.com>
---
 fs/proc/base.c               | 79 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sched.h        |  5 +++
 include/linux/sched/signal.h |  3 ++
 kernel/exit.c                | 16 +++++++++
 kernel/fork.c                |  4 +++
 mm/page_alloc.c              |  5 +++
 6 files changed, 112 insertions(+)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index a02acdaa3ca5..f395ec139587 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -56,6 +56,7 @@
 #include <linux/stat.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/init.h>
+#include <linux/kstat.h>
 #include <linux/capability.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
@@ -544,6 +545,78 @@ static const struct file_operations proc_lstats_operations 
= {
 
 #endif
 
+#ifdef CONFIG_VE
+static void lastlat_seq_show(struct seq_file *m,
+               const char *name,
+               struct kstat_lat_snap_struct *snap)
+{
+       seq_printf(m, "%-12s %20Lu %20lu\n", name,
+                       snap->totlat, snap->count);
+}
+static const char *alloc_descr[] = {
+       "allocatomic:",
+       "alloc:",
+       "allocmp:",
+};
+static const int alloc_types[] = {
+       KSTAT_ALLOCSTAT_ATOMIC,
+       KSTAT_ALLOCSTAT_LOW,
+       KSTAT_ALLOCSTAT_LOW_MP,
+};
+
+static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
+                       struct pid *pid, struct task_struct *task)
+{
+       int i;
+
+       seq_printf(m, "%-12s %20s %20s\n",
+                       "Type", "Total_lat", "Calls");
+
+       for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
+               lastlat_seq_show(m, alloc_descr[i],
+                               &task->alloc_lat[alloc_types[i]]);
+       return 0;
+}
+
+static int proc_tgid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
+                       struct pid *pid, struct task_struct *task)
+{
+       int i;
+       unsigned long flags;
+       u64 lat[ARRAY_SIZE(alloc_types)];
+       u64 count[ARRAY_SIZE(alloc_types)];
+
+       for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
+               lat[i] = task->alloc_lat[alloc_types[i]].totlat;
+               count[i] = task->alloc_lat[alloc_types[i]].count;
+       }
+
+       if (lock_task_sighand(task, &flags)) {
+               struct task_struct *t = task;
+               while_each_thread(task, t) {
+                       for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
+                               lat[i] += t->alloc_lat[alloc_types[i]].totlat;
+                               count[i] += t->alloc_lat[alloc_types[i]].count;
+                       }
+               }
+               for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
+                       lat[i] += t->signal->alloc_lat[alloc_types[i]].totlat;
+                       count[i] += t->signal->alloc_lat[alloc_types[i]].count;
+               }
+               unlock_task_sighand(task, &flags);
+       }
+
+       seq_printf(m, "%-12s %20s %20s\n",
+                       "Type", "Total_lat", "Calls");
+
+       for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
+               seq_printf(m, "%-12s %20Lu %20Lu\n", alloc_descr[i],
+                       lat[i], count[i]);
+
+       return 0;
+}
+#endif
+
 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
                          struct pid *pid, struct task_struct *task)
 {
@@ -3331,6 +3404,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_SECCOMP_CACHE_DEBUG
        ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
 #endif
+#ifdef CONFIG_VE
+       ONE("vz_latency", S_IRUGO, proc_tgid_vz_lat),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3664,6 +3740,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_SECCOMP_CACHE_DEBUG
        ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
 #endif
+#ifdef CONFIG_VE
+       ONE("vz_latency", S_IRUGO, proc_tid_vz_lat),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 332c36a8f4c4..e0a562d76aa1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -35,6 +35,7 @@
 #include <linux/seqlock.h>
 #include <linux/kcsan.h>
 #include <asm/kmap_size.h>
+#include <linux/kstat.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -1242,6 +1243,10 @@ struct task_struct {
 
        struct tlbflush_unmap_batch     tlb_ubc;
 
+#ifdef CONFIG_VE
+       struct kstat_lat_snap_struct    alloc_lat[KSTAT_ALLOCSTAT_NR];
+#endif
+
        union {
                refcount_t              rcu_users;
                struct rcu_head         rcu;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b9126fe06c3f..b48fba3fc9dd 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -208,6 +208,9 @@ struct signal_struct {
 #ifdef CONFIG_TASKSTATS
        struct taskstats *stats;
 #endif
+#ifdef CONFIG_VE
+       struct kstat_lat_snap_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
+#endif
 #ifdef CONFIG_AUDIT
        unsigned audit_tty;
        struct tty_audit_buf *tty_audit_buf;
diff --git a/kernel/exit.c b/kernel/exit.c
index d7b901c6b07a..5a0607ee41b1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -750,6 +750,20 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
+void kstat_add_dying(struct task_struct *tsk)
+{
+#ifdef CONFIG_VE
+       int i;
+
+       spin_lock_irq(&tsk->sighand->siglock);
+       for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
+               tsk->signal->alloc_lat[i].totlat += tsk->alloc_lat[i].totlat;
+               tsk->signal->alloc_lat[i].count += tsk->alloc_lat[i].count;
+       }
+       spin_unlock_irq(&tsk->sighand->siglock);
+#endif
+}
+
 void __noreturn do_exit(long code)
 {
        struct task_struct *tsk = current;
@@ -826,6 +840,8 @@ void __noreturn do_exit(long code)
 #endif
                if (tsk->mm)
                        setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
+       } else {
+               kstat_add_dying(tsk);
        }
        acct_collect(code, group_dead);
        if (group_dead)
diff --git a/kernel/fork.c b/kernel/fork.c
index 61adb6409f0d..7109198797ac 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -928,6 +928,10 @@ static struct task_struct *dup_task_struct(struct 
task_struct *orig, int node)
        if (orig->cpus_ptr == &orig->cpus_mask)
                tsk->cpus_ptr = &tsk->cpus_mask;
 
+#ifdef CONFIG_VE
+       memset(tsk->alloc_lat, 0, sizeof(tsk->alloc_lat));
+#endif
+
        /*
         * One for the user space visible state that goes away when reaped.
         * One for the scheduler.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3fa186ba631f..ce726b40fcf6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5428,6 +5428,11 @@ static void __alloc_collect_stats(gfp_t gfp_mask, 
unsigned int order,
        cpu = smp_processor_id();
        KSTAT_LAT_PCPU_ADD(&kstat_glob.alloc_lat[ind], delta);
 
+       if (in_task()) {
+               current->alloc_lat[ind].totlat += delta;
+               current->alloc_lat[ind].count++;
+       }
+
        if (!page)
                kstat_glob.alloc_fails[cpu][ind]++;
        local_irq_restore(flags);
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH RHEL9 COMMIT] ve/page_alloc, kstat: account allocation latencies per-task and per-thread

Reply via email to