The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after ark-5.14 ------> commit 4bb06a1a65c1417f64af16a8f0b4451c636ac97f Author: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> Date: Tue Oct 12 16:18:20 2021 +0300
ve/memcg: Virtualize /proc/vmstat view inside CT When users read /proc/vmstat inside container they expect to see agregated stats of all container cgroups. Note: We've had /proc/meminfo, /proc/vmstat, /proc/swaps and sysinfo(), virtualized in containers in VZ7, we need the same in VZ8. In VZ7 this was done via virtinfo subsystem which is dropped. https://jira.sw.ru/browse/PSBM-127780 Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> +++ ve/memcg: Cleanup /proc/vmstat virtualization https://jira.sw.ru/browse/PSBM-131992 Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com> +++ ve/memcg: Fix /proc/vmstat virtualization (drop extra recursion) https://jira.sw.ru/browse/PSBM-131992 Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com> +++ ve/memcg: Honor changing per-memcg s[un]reclaimable counters to bytes in per-CT /proc/vmstat RHEL8.4 has following ms commit backported: d42f3245c7e2 ("mm: memcg: convert vmstat slab counters to bytes") So, update places were we use per-memcg counters NR_SLAB_[UN]RECLAIMABLE_B accordingly. https://jira.sw.ru/browse/PSBM-132893 Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com> gorcunov@: In ms commit f19298b9516c ("mm/vmstat: convert NUMA statistics to basic NUMA counters") the constant NR_VM_NUMA_STAT_ITEMS get renamed to NR_VM_NUMA_EVENT_ITEMS. Signed-off-by: Cyrill Gorcunov <gorcu...@virtuozzo.com> --- include/linux/memcontrol.h | 6 ++++++ mm/memcontrol.c | 40 ++++++++++++++++++++++++++++++++++++++++ mm/vmstat.c | 16 +++++++++++++++- 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1311eec888b1..d56d77da80f9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1166,6 +1166,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, unsigned long *total_scanned); void mem_cgroup_fill_meminfo(struct mem_cgroup *memcg, struct meminfo *mi); +void mem_cgroup_fill_vmstat(struct mem_cgroup *memcg, unsigned long *stats); void memcg_neg_dentry_inc(struct dentry *dentry); void memcg_neg_dentry_dec(struct dentry *dentry); @@ -1571,6 +1572,11 @@ static void mem_cgroup_fill_meminfo(struct mem_cgroup *memcg, struct meminfo *mi { } +static inline void mem_cgroup_fill_vmstat(struct mem_cgroup *memcg, + unsigned long *stats) +{ +} + static inline void memcg_neg_dentry_inc(struct dentry *dentry) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a5b893a4721f..995e41ab3227 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4092,6 +4092,46 @@ void mem_cgroup_fill_meminfo(struct mem_cgroup *memcg, struct meminfo *mi) /* mi->locked = 0; */ } +void mem_cgroup_fill_vmstat(struct mem_cgroup *memcg, unsigned long *stats) +{ + int i; + unsigned long limit = READ_ONCE(memcg->memory.max); + unsigned long memory = page_counter_read(&memcg->memory); + unsigned long *zone_stats = stats; + unsigned long *node_stats = stats + + NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_EVENT_ITEMS; + unsigned long *vm_stats = node_stats + + NR_VM_NODE_STAT_ITEMS + + NR_VM_WRITEBACK_STAT_ITEMS; + + zone_stats[NR_FREE_PAGES] = memory > limit ? 0 : limit - memory; + + for (i = LRU_BASE; i < NR_LRU_LISTS; i++) { + node_stats[NR_LRU_BASE + i] = + mem_cgroup_nr_lru_pages(memcg, BIT(i), true); + } + + node_stats[NR_ANON_MAPPED] = node_stats[NR_ACTIVE_ANON] + + node_stats[NR_INACTIVE_ANON]; + node_stats[NR_FILE_PAGES] = node_stats[NR_ACTIVE_FILE] + + node_stats[NR_INACTIVE_FILE]; + + node_stats[NR_SLAB_RECLAIMABLE_B] = + memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) >> PAGE_SHIFT; + node_stats[NR_SLAB_UNRECLAIMABLE_B] = + memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B) >> PAGE_SHIFT; + node_stats[NR_FILE_MAPPED] = memcg_page_state(memcg, NR_FILE_MAPPED); + node_stats[NR_SHMEM] = memcg_page_state(memcg, NR_SHMEM); + +#ifdef CONFIG_VM_EVENT_COUNTERS + vm_stats[PSWPIN] = memcg_events(memcg, PSWPIN); + vm_stats[PSWPOUT] = memcg_events(memcg, PSWPOUT); + vm_stats[PGFAULT] = memcg_events(memcg, PGFAULT); + vm_stats[PGMAJFAULT] = memcg_events(memcg, PGMAJFAULT); +#endif +} + int sysctl_ve_overcommit_memory __read_mostly; static int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages) diff --git a/mm/vmstat.c b/mm/vmstat.c index 30436a098821..9b3129731404 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -28,6 +28,8 @@ #include <linux/mm_inline.h> #include <linux/page_ext.h> #include <linux/page_owner.h> +#include <linux/memcontrol.h> +#include <linux/ve.h> #include "internal.h" @@ -1708,8 +1710,18 @@ static const struct seq_operations zoneinfo_op = { (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ NR_VM_EVENT_ITEMS : 0)) +static void fill_vmstat_ve(unsigned long *stat, struct ve_struct *ve) +{ + struct cgroup_subsys_state *css; + + css = ve_get_init_css(ve, memory_cgrp_id); + mem_cgroup_fill_vmstat(mem_cgroup_from_css(css), stat); + css_put(css); +} + static void *vmstat_start(struct seq_file *m, loff_t *pos) { + struct ve_struct *ve; unsigned long *v; int i; @@ -1723,8 +1735,10 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) if (!v) return ERR_PTR(-ENOMEM); - if (!ve_is_super(get_exec_env())) { + ve = get_exec_env(); + if (!ve_is_super(ve)) { memset(v, 0, NR_VMSTAT_ITEMS * sizeof(unsigned long)); + fill_vmstat_ve(v, ve); return (unsigned long *)m->private + *pos; } _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel