On Thu 19-02-26 15:58:46, JP Kobryn (Meta) wrote: > There are situations where reclaim kicks in on a system with free memory. > One possible cause is a NUMA imbalance scenario where one or more nodes are > under pressure. It would help if we could easily identify such nodes. > > Move the pgscan, pgsteal, and pgrefill counters from vm_event_item to > node_stat_item to provide per-node reclaim visibility. With these counters > as node stats, the values are now displayed in the per-node section of > /proc/zoneinfo, which allows for quick identification of the affected > nodes. > > /proc/vmstat continues to report the same counters, aggregated across all > nodes. But the ordering of these items within the readout changes as they > move from the vm events section to the node stats section. > > Memcg accounting of these counters is preserved. The relocated counters > remain visible in memory.stat alongside the existing aggregate pgscan and > pgsteal counters. > > However, this change affects how the global counters are accumulated. > Previously, the global event count update was gated on !cgroup_reclaim(), > excluding memcg-based reclaim from /proc/vmstat. Now that > mod_lruvec_state() is being used to update the counters, the global > counters will include all reclaim. This is consistent with how pgdemote > counters are already tracked. > > Finally, the virtio_balloon driver is updated to use > global_node_page_state() to fetch the counters, as they are no longer > accessible through the vm_events array. > > Signed-off-by: JP Kobryn <[email protected]> > Suggested-by: Johannes Weiner <[email protected]> > Acked-by: Michael S. Tsirkin <[email protected]> > Reviewed-by: Vlastimil Babka (SUSE) <[email protected]>
Acked-by: Michal Hocko <[email protected]> Thanks > --- > v5: > - rebase onto mm/mm-new > > v4: > https://lore.kernel.org/linux-mm/[email protected]/ > - remove unused memcg var from scan_folios() > > v3: > https://lore.kernel.org/linux-mm/[email protected]/ > - additionally move PGREFILL to node stats > > v2: > https://lore.kernel.org/linux-mm/[email protected]/ > - update commit message > - add entries to memory_stats array > - add switch cases in memcg_page_state_output_unit() > > v1: > https://lore.kernel.org/linux-mm/[email protected]/ > > drivers/virtio/virtio_balloon.c | 8 ++--- > include/linux/mmzone.h | 13 ++++++++ > include/linux/vm_event_item.h | 13 -------- > mm/memcontrol.c | 56 +++++++++++++++++++++++---------- > mm/vmscan.c | 39 ++++++++--------------- > mm/vmstat.c | 26 +++++++-------- > 6 files changed, 82 insertions(+), 73 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index 4e549abe59ff..ab945532ceef 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -369,13 +369,13 @@ static inline unsigned int > update_balloon_vm_stats(struct virtio_balloon *vb) > update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall); > > update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_SCAN, > - pages_to_bytes(events[PGSCAN_KSWAPD])); > + pages_to_bytes(global_node_page_state(PGSCAN_KSWAPD))); > update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_SCAN, > - pages_to_bytes(events[PGSCAN_DIRECT])); > + pages_to_bytes(global_node_page_state(PGSCAN_DIRECT))); > update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_RECLAIM, > - pages_to_bytes(events[PGSTEAL_KSWAPD])); > + pages_to_bytes(global_node_page_state(PGSTEAL_KSWAPD))); > update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_RECLAIM, > - pages_to_bytes(events[PGSTEAL_DIRECT])); > + pages_to_bytes(global_node_page_state(PGSTEAL_DIRECT))); > > #ifdef CONFIG_HUGETLB_PAGE > update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC, > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > index 3e51190a55e4..546bca95ca40 100644 > --- a/include/linux/mmzone.h > +++ b/include/linux/mmzone.h > @@ -255,6 +255,19 @@ enum node_stat_item { > PGDEMOTE_DIRECT, > PGDEMOTE_KHUGEPAGED, > PGDEMOTE_PROACTIVE, > + PGSTEAL_KSWAPD, > + PGSTEAL_DIRECT, > + PGSTEAL_KHUGEPAGED, > + PGSTEAL_PROACTIVE, > + PGSTEAL_ANON, > + PGSTEAL_FILE, > + PGSCAN_KSWAPD, > + PGSCAN_DIRECT, > + PGSCAN_KHUGEPAGED, > + PGSCAN_PROACTIVE, > + PGSCAN_ANON, > + PGSCAN_FILE, > + PGREFILL, > #ifdef CONFIG_HUGETLB_PAGE > NR_HUGETLB, > #endif > diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h > index 22a139f82d75..03fe95f5a020 100644 > --- a/include/linux/vm_event_item.h > +++ b/include/linux/vm_event_item.h > @@ -38,21 +38,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, > PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE, > PGFAULT, PGMAJFAULT, > PGLAZYFREED, > - PGREFILL, > PGREUSE, > - PGSTEAL_KSWAPD, > - PGSTEAL_DIRECT, > - PGSTEAL_KHUGEPAGED, > - PGSTEAL_PROACTIVE, > - PGSCAN_KSWAPD, > - PGSCAN_DIRECT, > - PGSCAN_KHUGEPAGED, > - PGSCAN_PROACTIVE, > PGSCAN_DIRECT_THROTTLE, > - PGSCAN_ANON, > - PGSCAN_FILE, > - PGSTEAL_ANON, > - PGSTEAL_FILE, > #ifdef CONFIG_NUMA > PGSCAN_ZONE_RECLAIM_SUCCESS, > PGSCAN_ZONE_RECLAIM_FAILED, > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 6fb9c999347b..0d834c47706f 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -331,6 +331,19 @@ static const unsigned int memcg_node_stat_items[] = { > PGDEMOTE_DIRECT, > PGDEMOTE_KHUGEPAGED, > PGDEMOTE_PROACTIVE, > + PGSTEAL_KSWAPD, > + PGSTEAL_DIRECT, > + PGSTEAL_KHUGEPAGED, > + PGSTEAL_PROACTIVE, > + PGSTEAL_ANON, > + PGSTEAL_FILE, > + PGSCAN_KSWAPD, > + PGSCAN_DIRECT, > + PGSCAN_KHUGEPAGED, > + PGSCAN_PROACTIVE, > + PGSCAN_ANON, > + PGSCAN_FILE, > + PGREFILL, > #ifdef CONFIG_HUGETLB_PAGE > NR_HUGETLB, > #endif > @@ -444,17 +457,8 @@ static const unsigned int memcg_vm_event_stat[] = { > #endif > PSWPIN, > PSWPOUT, > - PGSCAN_KSWAPD, > - PGSCAN_DIRECT, > - PGSCAN_KHUGEPAGED, > - PGSCAN_PROACTIVE, > - PGSTEAL_KSWAPD, > - PGSTEAL_DIRECT, > - PGSTEAL_KHUGEPAGED, > - PGSTEAL_PROACTIVE, > PGFAULT, > PGMAJFAULT, > - PGREFILL, > PGACTIVATE, > PGDEACTIVATE, > PGLAZYFREE, > @@ -1401,6 +1405,15 @@ static const struct memory_stat memory_stats[] = { > { "pgdemote_direct", PGDEMOTE_DIRECT }, > { "pgdemote_khugepaged", PGDEMOTE_KHUGEPAGED }, > { "pgdemote_proactive", PGDEMOTE_PROACTIVE }, > + { "pgsteal_kswapd", PGSTEAL_KSWAPD }, > + { "pgsteal_direct", PGSTEAL_DIRECT }, > + { "pgsteal_khugepaged", PGSTEAL_KHUGEPAGED }, > + { "pgsteal_proactive", PGSTEAL_PROACTIVE }, > + { "pgscan_kswapd", PGSCAN_KSWAPD }, > + { "pgscan_direct", PGSCAN_DIRECT }, > + { "pgscan_khugepaged", PGSCAN_KHUGEPAGED }, > + { "pgscan_proactive", PGSCAN_PROACTIVE }, > + { "pgrefill", PGREFILL }, > #ifdef CONFIG_NUMA_BALANCING > { "pgpromote_success", PGPROMOTE_SUCCESS }, > #endif > @@ -1444,6 +1457,15 @@ static int memcg_page_state_output_unit(int item) > case PGDEMOTE_DIRECT: > case PGDEMOTE_KHUGEPAGED: > case PGDEMOTE_PROACTIVE: > + case PGSTEAL_KSWAPD: > + case PGSTEAL_DIRECT: > + case PGSTEAL_KHUGEPAGED: > + case PGSTEAL_PROACTIVE: > + case PGSCAN_KSWAPD: > + case PGSCAN_DIRECT: > + case PGSCAN_KHUGEPAGED: > + case PGSCAN_PROACTIVE: > + case PGREFILL: > #ifdef CONFIG_NUMA_BALANCING > case PGPROMOTE_SUCCESS: > #endif > @@ -1562,15 +1584,15 @@ static void memcg_stat_format(struct mem_cgroup > *memcg, struct seq_buf *s) > > /* Accumulated memory events */ > memcg_seq_buf_print_stat(s, NULL, "pgscan", ' ', > - memcg_events(memcg, PGSCAN_KSWAPD) + > - memcg_events(memcg, PGSCAN_DIRECT) + > - memcg_events(memcg, PGSCAN_PROACTIVE) + > - memcg_events(memcg, PGSCAN_KHUGEPAGED)); > + memcg_page_state(memcg, PGSCAN_KSWAPD) + > + memcg_page_state(memcg, PGSCAN_DIRECT) + > + memcg_page_state(memcg, PGSCAN_PROACTIVE) + > + memcg_page_state(memcg, PGSCAN_KHUGEPAGED)); > memcg_seq_buf_print_stat(s, NULL, "pgsteal", ' ', > - memcg_events(memcg, PGSTEAL_KSWAPD) + > - memcg_events(memcg, PGSTEAL_DIRECT) + > - memcg_events(memcg, PGSTEAL_PROACTIVE) + > - memcg_events(memcg, PGSTEAL_KHUGEPAGED)); > + memcg_page_state(memcg, PGSTEAL_KSWAPD) + > + memcg_page_state(memcg, PGSTEAL_DIRECT) + > + memcg_page_state(memcg, PGSTEAL_PROACTIVE) + > + memcg_page_state(memcg, PGSTEAL_KHUGEPAGED)); > > for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) { > #ifdef CONFIG_MEMCG_V1 > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 5fa6e6bd6540..c3dc7c7befac 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -1984,7 +1984,7 @@ static unsigned long shrink_inactive_list(unsigned long > nr_to_scan, > unsigned long nr_taken; > struct reclaim_stat stat; > bool file = is_file_lru(lru); > - enum vm_event_item item; > + enum node_stat_item item; > struct pglist_data *pgdat = lruvec_pgdat(lruvec); > bool stalled = false; > > @@ -2010,10 +2010,8 @@ static unsigned long shrink_inactive_list(unsigned > long nr_to_scan, > > __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); > item = PGSCAN_KSWAPD + reclaimer_offset(sc); > - if (!cgroup_reclaim(sc)) > - __count_vm_events(item, nr_scanned); > - count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned); > - __count_vm_events(PGSCAN_ANON + file, nr_scanned); > + mod_lruvec_state(lruvec, item, nr_scanned); > + mod_lruvec_state(lruvec, PGSCAN_ANON + file, nr_scanned); > > spin_unlock_irq(&lruvec->lru_lock); > > @@ -2030,10 +2028,8 @@ static unsigned long shrink_inactive_list(unsigned > long nr_to_scan, > stat.nr_demoted); > __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); > item = PGSTEAL_KSWAPD + reclaimer_offset(sc); > - if (!cgroup_reclaim(sc)) > - __count_vm_events(item, nr_reclaimed); > - count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed); > - __count_vm_events(PGSTEAL_ANON + file, nr_reclaimed); > + mod_lruvec_state(lruvec, item, nr_reclaimed); > + mod_lruvec_state(lruvec, PGSTEAL_ANON + file, nr_reclaimed); > > lru_note_cost_unlock_irq(lruvec, file, stat.nr_pageout, > nr_scanned - nr_reclaimed); > @@ -2120,9 +2116,7 @@ static void shrink_active_list(unsigned long nr_to_scan, > > __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); > > - if (!cgroup_reclaim(sc)) > - __count_vm_events(PGREFILL, nr_scanned); > - count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned); > + mod_lruvec_state(lruvec, PGREFILL, nr_scanned); > > spin_unlock_irq(&lruvec->lru_lock); > > @@ -4537,7 +4531,7 @@ static int scan_folios(unsigned long nr_to_scan, struct > lruvec *lruvec, > { > int i; > int gen; > - enum vm_event_item item; > + enum node_stat_item item; > int sorted = 0; > int scanned = 0; > int isolated = 0; > @@ -4545,7 +4539,6 @@ static int scan_folios(unsigned long nr_to_scan, struct > lruvec *lruvec, > int scan_batch = min(nr_to_scan, MAX_LRU_BATCH); > int remaining = scan_batch; > struct lru_gen_folio *lrugen = &lruvec->lrugen; > - struct mem_cgroup *memcg = lruvec_memcg(lruvec); > > VM_WARN_ON_ONCE(!list_empty(list)); > > @@ -4596,13 +4589,9 @@ static int scan_folios(unsigned long nr_to_scan, > struct lruvec *lruvec, > } > > item = PGSCAN_KSWAPD + reclaimer_offset(sc); > - if (!cgroup_reclaim(sc)) { > - __count_vm_events(item, isolated); > - __count_vm_events(PGREFILL, sorted); > - } > - count_memcg_events(memcg, item, isolated); > - count_memcg_events(memcg, PGREFILL, sorted); > - __count_vm_events(PGSCAN_ANON + type, isolated); > + mod_lruvec_state(lruvec, item, isolated); > + mod_lruvec_state(lruvec, PGREFILL, sorted); > + mod_lruvec_state(lruvec, PGSCAN_ANON + type, isolated); > trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, scan_batch, > scanned, skipped, isolated, > type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON); > @@ -4705,7 +4694,7 @@ static int evict_folios(unsigned long nr_to_scan, > struct lruvec *lruvec, > LIST_HEAD(clean); > struct folio *folio; > struct folio *next; > - enum vm_event_item item; > + enum node_stat_item item; > struct reclaim_stat stat; > struct lru_gen_mm_walk *walk; > bool skip_retry = false; > @@ -4769,10 +4758,8 @@ static int evict_folios(unsigned long nr_to_scan, > struct lruvec *lruvec, > stat.nr_demoted); > > item = PGSTEAL_KSWAPD + reclaimer_offset(sc); > - if (!cgroup_reclaim(sc)) > - __count_vm_events(item, reclaimed); > - count_memcg_events(memcg, item, reclaimed); > - __count_vm_events(PGSTEAL_ANON + type, reclaimed); > + mod_lruvec_state(lruvec, item, reclaimed); > + mod_lruvec_state(lruvec, PGSTEAL_ANON + type, reclaimed); > > spin_unlock_irq(&lruvec->lru_lock); > > diff --git a/mm/vmstat.c b/mm/vmstat.c > index 86b14b0f77b5..44bbb7752f11 100644 > --- a/mm/vmstat.c > +++ b/mm/vmstat.c > @@ -1276,6 +1276,19 @@ const char * const vmstat_text[] = { > [I(PGDEMOTE_DIRECT)] = "pgdemote_direct", > [I(PGDEMOTE_KHUGEPAGED)] = "pgdemote_khugepaged", > [I(PGDEMOTE_PROACTIVE)] = "pgdemote_proactive", > + [I(PGSTEAL_KSWAPD)] = "pgsteal_kswapd", > + [I(PGSTEAL_DIRECT)] = "pgsteal_direct", > + [I(PGSTEAL_KHUGEPAGED)] = "pgsteal_khugepaged", > + [I(PGSTEAL_PROACTIVE)] = "pgsteal_proactive", > + [I(PGSTEAL_ANON)] = "pgsteal_anon", > + [I(PGSTEAL_FILE)] = "pgsteal_file", > + [I(PGSCAN_KSWAPD)] = "pgscan_kswapd", > + [I(PGSCAN_DIRECT)] = "pgscan_direct", > + [I(PGSCAN_KHUGEPAGED)] = "pgscan_khugepaged", > + [I(PGSCAN_PROACTIVE)] = "pgscan_proactive", > + [I(PGSCAN_ANON)] = "pgscan_anon", > + [I(PGSCAN_FILE)] = "pgscan_file", > + [I(PGREFILL)] = "pgrefill", > #ifdef CONFIG_HUGETLB_PAGE > [I(NR_HUGETLB)] = "nr_hugetlb", > #endif > @@ -1318,21 +1331,8 @@ const char * const vmstat_text[] = { > [I(PGMAJFAULT)] = "pgmajfault", > [I(PGLAZYFREED)] = "pglazyfreed", > > - [I(PGREFILL)] = "pgrefill", > [I(PGREUSE)] = "pgreuse", > - [I(PGSTEAL_KSWAPD)] = "pgsteal_kswapd", > - [I(PGSTEAL_DIRECT)] = "pgsteal_direct", > - [I(PGSTEAL_KHUGEPAGED)] = "pgsteal_khugepaged", > - [I(PGSTEAL_PROACTIVE)] = "pgsteal_proactive", > - [I(PGSCAN_KSWAPD)] = "pgscan_kswapd", > - [I(PGSCAN_DIRECT)] = "pgscan_direct", > - [I(PGSCAN_KHUGEPAGED)] = "pgscan_khugepaged", > - [I(PGSCAN_PROACTIVE)] = "pgscan_proactive", > [I(PGSCAN_DIRECT_THROTTLE)] = "pgscan_direct_throttle", > - [I(PGSCAN_ANON)] = "pgscan_anon", > - [I(PGSCAN_FILE)] = "pgscan_file", > - [I(PGSTEAL_ANON)] = "pgsteal_anon", > - [I(PGSTEAL_FILE)] = "pgsteal_file", > > #ifdef CONFIG_NUMA > [I(PGSCAN_ZONE_RECLAIM_SUCCESS)] = "zone_reclaim_success", > -- > 2.47.3 -- Michal Hocko SUSE Labs

