On Thu 19-02-26 15:58:46, JP Kobryn (Meta) wrote:
> There are situations where reclaim kicks in on a system with free memory.
> One possible cause is a NUMA imbalance scenario where one or more nodes are
> under pressure. It would help if we could easily identify such nodes.
> 
> Move the pgscan, pgsteal, and pgrefill counters from vm_event_item to
> node_stat_item to provide per-node reclaim visibility. With these counters
> as node stats, the values are now displayed in the per-node section of
> /proc/zoneinfo, which allows for quick identification of the affected
> nodes.
> 
> /proc/vmstat continues to report the same counters, aggregated across all
> nodes. But the ordering of these items within the readout changes as they
> move from the vm events section to the node stats section.
> 
> Memcg accounting of these counters is preserved. The relocated counters
> remain visible in memory.stat alongside the existing aggregate pgscan and
> pgsteal counters.
> 
> However, this change affects how the global counters are accumulated.
> Previously, the global event count update was gated on !cgroup_reclaim(),
> excluding memcg-based reclaim from /proc/vmstat. Now that
> mod_lruvec_state() is being used to update the counters, the global
> counters will include all reclaim. This is consistent with how pgdemote
> counters are already tracked.
> 
> Finally, the virtio_balloon driver is updated to use
> global_node_page_state() to fetch the counters, as they are no longer
> accessible through the vm_events array.
> 
> Signed-off-by: JP Kobryn <[email protected]>
> Suggested-by: Johannes Weiner <[email protected]>
> Acked-by: Michael S. Tsirkin <[email protected]>
> Reviewed-by: Vlastimil Babka (SUSE) <[email protected]>

Acked-by: Michal Hocko <[email protected]>
Thanks

> ---
> v5:
>       - rebase onto mm/mm-new
> 
> v4: 
> https://lore.kernel.org/linux-mm/[email protected]/
>       - remove unused memcg var from scan_folios()
> 
> v3: 
> https://lore.kernel.org/linux-mm/[email protected]/
>       - additionally move PGREFILL to node stats
> 
> v2: 
> https://lore.kernel.org/linux-mm/[email protected]/
>       - update commit message
>       - add entries to memory_stats array
>       - add switch cases in memcg_page_state_output_unit()
> 
> v1: 
> https://lore.kernel.org/linux-mm/[email protected]/
> 
>  drivers/virtio/virtio_balloon.c |  8 ++---
>  include/linux/mmzone.h          | 13 ++++++++
>  include/linux/vm_event_item.h   | 13 --------
>  mm/memcontrol.c                 | 56 +++++++++++++++++++++++----------
>  mm/vmscan.c                     | 39 ++++++++---------------
>  mm/vmstat.c                     | 26 +++++++--------
>  6 files changed, 82 insertions(+), 73 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 4e549abe59ff..ab945532ceef 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -369,13 +369,13 @@ static inline unsigned int 
> update_balloon_vm_stats(struct virtio_balloon *vb)
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall);
>  
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_SCAN,
> -                 pages_to_bytes(events[PGSCAN_KSWAPD]));
> +                 pages_to_bytes(global_node_page_state(PGSCAN_KSWAPD)));
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_SCAN,
> -                 pages_to_bytes(events[PGSCAN_DIRECT]));
> +                 pages_to_bytes(global_node_page_state(PGSCAN_DIRECT)));
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_RECLAIM,
> -                 pages_to_bytes(events[PGSTEAL_KSWAPD]));
> +                 pages_to_bytes(global_node_page_state(PGSTEAL_KSWAPD)));
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_RECLAIM,
> -                 pages_to_bytes(events[PGSTEAL_DIRECT]));
> +                 pages_to_bytes(global_node_page_state(PGSTEAL_DIRECT)));
>  
>  #ifdef CONFIG_HUGETLB_PAGE
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 3e51190a55e4..546bca95ca40 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -255,6 +255,19 @@ enum node_stat_item {
>       PGDEMOTE_DIRECT,
>       PGDEMOTE_KHUGEPAGED,
>       PGDEMOTE_PROACTIVE,
> +     PGSTEAL_KSWAPD,
> +     PGSTEAL_DIRECT,
> +     PGSTEAL_KHUGEPAGED,
> +     PGSTEAL_PROACTIVE,
> +     PGSTEAL_ANON,
> +     PGSTEAL_FILE,
> +     PGSCAN_KSWAPD,
> +     PGSCAN_DIRECT,
> +     PGSCAN_KHUGEPAGED,
> +     PGSCAN_PROACTIVE,
> +     PGSCAN_ANON,
> +     PGSCAN_FILE,
> +     PGREFILL,
>  #ifdef CONFIG_HUGETLB_PAGE
>       NR_HUGETLB,
>  #endif
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index 22a139f82d75..03fe95f5a020 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -38,21 +38,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>               PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE,
>               PGFAULT, PGMAJFAULT,
>               PGLAZYFREED,
> -             PGREFILL,
>               PGREUSE,
> -             PGSTEAL_KSWAPD,
> -             PGSTEAL_DIRECT,
> -             PGSTEAL_KHUGEPAGED,
> -             PGSTEAL_PROACTIVE,
> -             PGSCAN_KSWAPD,
> -             PGSCAN_DIRECT,
> -             PGSCAN_KHUGEPAGED,
> -             PGSCAN_PROACTIVE,
>               PGSCAN_DIRECT_THROTTLE,
> -             PGSCAN_ANON,
> -             PGSCAN_FILE,
> -             PGSTEAL_ANON,
> -             PGSTEAL_FILE,
>  #ifdef CONFIG_NUMA
>               PGSCAN_ZONE_RECLAIM_SUCCESS,
>               PGSCAN_ZONE_RECLAIM_FAILED,
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 6fb9c999347b..0d834c47706f 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -331,6 +331,19 @@ static const unsigned int memcg_node_stat_items[] = {
>       PGDEMOTE_DIRECT,
>       PGDEMOTE_KHUGEPAGED,
>       PGDEMOTE_PROACTIVE,
> +     PGSTEAL_KSWAPD,
> +     PGSTEAL_DIRECT,
> +     PGSTEAL_KHUGEPAGED,
> +     PGSTEAL_PROACTIVE,
> +     PGSTEAL_ANON,
> +     PGSTEAL_FILE,
> +     PGSCAN_KSWAPD,
> +     PGSCAN_DIRECT,
> +     PGSCAN_KHUGEPAGED,
> +     PGSCAN_PROACTIVE,
> +     PGSCAN_ANON,
> +     PGSCAN_FILE,
> +     PGREFILL,
>  #ifdef CONFIG_HUGETLB_PAGE
>       NR_HUGETLB,
>  #endif
> @@ -444,17 +457,8 @@ static const unsigned int memcg_vm_event_stat[] = {
>  #endif
>       PSWPIN,
>       PSWPOUT,
> -     PGSCAN_KSWAPD,
> -     PGSCAN_DIRECT,
> -     PGSCAN_KHUGEPAGED,
> -     PGSCAN_PROACTIVE,
> -     PGSTEAL_KSWAPD,
> -     PGSTEAL_DIRECT,
> -     PGSTEAL_KHUGEPAGED,
> -     PGSTEAL_PROACTIVE,
>       PGFAULT,
>       PGMAJFAULT,
> -     PGREFILL,
>       PGACTIVATE,
>       PGDEACTIVATE,
>       PGLAZYFREE,
> @@ -1401,6 +1405,15 @@ static const struct memory_stat memory_stats[] = {
>       { "pgdemote_direct",            PGDEMOTE_DIRECT         },
>       { "pgdemote_khugepaged",        PGDEMOTE_KHUGEPAGED     },
>       { "pgdemote_proactive",         PGDEMOTE_PROACTIVE      },
> +     { "pgsteal_kswapd",             PGSTEAL_KSWAPD          },
> +     { "pgsteal_direct",             PGSTEAL_DIRECT          },
> +     { "pgsteal_khugepaged",         PGSTEAL_KHUGEPAGED      },
> +     { "pgsteal_proactive",          PGSTEAL_PROACTIVE       },
> +     { "pgscan_kswapd",              PGSCAN_KSWAPD           },
> +     { "pgscan_direct",              PGSCAN_DIRECT           },
> +     { "pgscan_khugepaged",          PGSCAN_KHUGEPAGED       },
> +     { "pgscan_proactive",           PGSCAN_PROACTIVE        },
> +     { "pgrefill",                   PGREFILL                },
>  #ifdef CONFIG_NUMA_BALANCING
>       { "pgpromote_success",          PGPROMOTE_SUCCESS       },
>  #endif
> @@ -1444,6 +1457,15 @@ static int memcg_page_state_output_unit(int item)
>       case PGDEMOTE_DIRECT:
>       case PGDEMOTE_KHUGEPAGED:
>       case PGDEMOTE_PROACTIVE:
> +     case PGSTEAL_KSWAPD:
> +     case PGSTEAL_DIRECT:
> +     case PGSTEAL_KHUGEPAGED:
> +     case PGSTEAL_PROACTIVE:
> +     case PGSCAN_KSWAPD:
> +     case PGSCAN_DIRECT:
> +     case PGSCAN_KHUGEPAGED:
> +     case PGSCAN_PROACTIVE:
> +     case PGREFILL:
>  #ifdef CONFIG_NUMA_BALANCING
>       case PGPROMOTE_SUCCESS:
>  #endif
> @@ -1562,15 +1584,15 @@ static void memcg_stat_format(struct mem_cgroup 
> *memcg, struct seq_buf *s)
>  
>       /* Accumulated memory events */
>       memcg_seq_buf_print_stat(s, NULL, "pgscan", ' ',
> -                              memcg_events(memcg, PGSCAN_KSWAPD) +
> -                              memcg_events(memcg, PGSCAN_DIRECT) +
> -                              memcg_events(memcg, PGSCAN_PROACTIVE) +
> -                              memcg_events(memcg, PGSCAN_KHUGEPAGED));
> +                              memcg_page_state(memcg, PGSCAN_KSWAPD) +
> +                              memcg_page_state(memcg, PGSCAN_DIRECT) +
> +                              memcg_page_state(memcg, PGSCAN_PROACTIVE) +
> +                              memcg_page_state(memcg, PGSCAN_KHUGEPAGED));
>       memcg_seq_buf_print_stat(s, NULL, "pgsteal", ' ',
> -                              memcg_events(memcg, PGSTEAL_KSWAPD) +
> -                              memcg_events(memcg, PGSTEAL_DIRECT) +
> -                              memcg_events(memcg, PGSTEAL_PROACTIVE) +
> -                              memcg_events(memcg, PGSTEAL_KHUGEPAGED));
> +                              memcg_page_state(memcg, PGSTEAL_KSWAPD) +
> +                              memcg_page_state(memcg, PGSTEAL_DIRECT) +
> +                              memcg_page_state(memcg, PGSTEAL_PROACTIVE) +
> +                              memcg_page_state(memcg, PGSTEAL_KHUGEPAGED));
>  
>       for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
>  #ifdef CONFIG_MEMCG_V1
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 5fa6e6bd6540..c3dc7c7befac 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1984,7 +1984,7 @@ static unsigned long shrink_inactive_list(unsigned long 
> nr_to_scan,
>       unsigned long nr_taken;
>       struct reclaim_stat stat;
>       bool file = is_file_lru(lru);
> -     enum vm_event_item item;
> +     enum node_stat_item item;
>       struct pglist_data *pgdat = lruvec_pgdat(lruvec);
>       bool stalled = false;
>  
> @@ -2010,10 +2010,8 @@ static unsigned long shrink_inactive_list(unsigned 
> long nr_to_scan,
>  
>       __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
>       item = PGSCAN_KSWAPD + reclaimer_offset(sc);
> -     if (!cgroup_reclaim(sc))
> -             __count_vm_events(item, nr_scanned);
> -     count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
> -     __count_vm_events(PGSCAN_ANON + file, nr_scanned);
> +     mod_lruvec_state(lruvec, item, nr_scanned);
> +     mod_lruvec_state(lruvec, PGSCAN_ANON + file, nr_scanned);
>  
>       spin_unlock_irq(&lruvec->lru_lock);
>  
> @@ -2030,10 +2028,8 @@ static unsigned long shrink_inactive_list(unsigned 
> long nr_to_scan,
>                                       stat.nr_demoted);
>       __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
>       item = PGSTEAL_KSWAPD + reclaimer_offset(sc);
> -     if (!cgroup_reclaim(sc))
> -             __count_vm_events(item, nr_reclaimed);
> -     count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
> -     __count_vm_events(PGSTEAL_ANON + file, nr_reclaimed);
> +     mod_lruvec_state(lruvec, item, nr_reclaimed);
> +     mod_lruvec_state(lruvec, PGSTEAL_ANON + file, nr_reclaimed);
>  
>       lru_note_cost_unlock_irq(lruvec, file, stat.nr_pageout,
>                                       nr_scanned - nr_reclaimed);
> @@ -2120,9 +2116,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
>  
>       __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
>  
> -     if (!cgroup_reclaim(sc))
> -             __count_vm_events(PGREFILL, nr_scanned);
> -     count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
> +     mod_lruvec_state(lruvec, PGREFILL, nr_scanned);
>  
>       spin_unlock_irq(&lruvec->lru_lock);
>  
> @@ -4537,7 +4531,7 @@ static int scan_folios(unsigned long nr_to_scan, struct 
> lruvec *lruvec,
>  {
>       int i;
>       int gen;
> -     enum vm_event_item item;
> +     enum node_stat_item item;
>       int sorted = 0;
>       int scanned = 0;
>       int isolated = 0;
> @@ -4545,7 +4539,6 @@ static int scan_folios(unsigned long nr_to_scan, struct 
> lruvec *lruvec,
>       int scan_batch = min(nr_to_scan, MAX_LRU_BATCH);
>       int remaining = scan_batch;
>       struct lru_gen_folio *lrugen = &lruvec->lrugen;
> -     struct mem_cgroup *memcg = lruvec_memcg(lruvec);
>  
>       VM_WARN_ON_ONCE(!list_empty(list));
>  
> @@ -4596,13 +4589,9 @@ static int scan_folios(unsigned long nr_to_scan, 
> struct lruvec *lruvec,
>       }
>  
>       item = PGSCAN_KSWAPD + reclaimer_offset(sc);
> -     if (!cgroup_reclaim(sc)) {
> -             __count_vm_events(item, isolated);
> -             __count_vm_events(PGREFILL, sorted);
> -     }
> -     count_memcg_events(memcg, item, isolated);
> -     count_memcg_events(memcg, PGREFILL, sorted);
> -     __count_vm_events(PGSCAN_ANON + type, isolated);
> +     mod_lruvec_state(lruvec, item, isolated);
> +     mod_lruvec_state(lruvec, PGREFILL, sorted);
> +     mod_lruvec_state(lruvec, PGSCAN_ANON + type, isolated);
>       trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, scan_batch,
>                               scanned, skipped, isolated,
>                               type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
> @@ -4705,7 +4694,7 @@ static int evict_folios(unsigned long nr_to_scan, 
> struct lruvec *lruvec,
>       LIST_HEAD(clean);
>       struct folio *folio;
>       struct folio *next;
> -     enum vm_event_item item;
> +     enum node_stat_item item;
>       struct reclaim_stat stat;
>       struct lru_gen_mm_walk *walk;
>       bool skip_retry = false;
> @@ -4769,10 +4758,8 @@ static int evict_folios(unsigned long nr_to_scan, 
> struct lruvec *lruvec,
>                                       stat.nr_demoted);
>  
>       item = PGSTEAL_KSWAPD + reclaimer_offset(sc);
> -     if (!cgroup_reclaim(sc))
> -             __count_vm_events(item, reclaimed);
> -     count_memcg_events(memcg, item, reclaimed);
> -     __count_vm_events(PGSTEAL_ANON + type, reclaimed);
> +     mod_lruvec_state(lruvec, item, reclaimed);
> +     mod_lruvec_state(lruvec, PGSTEAL_ANON + type, reclaimed);
>  
>       spin_unlock_irq(&lruvec->lru_lock);
>  
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 86b14b0f77b5..44bbb7752f11 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1276,6 +1276,19 @@ const char * const vmstat_text[] = {
>       [I(PGDEMOTE_DIRECT)]                    = "pgdemote_direct",
>       [I(PGDEMOTE_KHUGEPAGED)]                = "pgdemote_khugepaged",
>       [I(PGDEMOTE_PROACTIVE)]                 = "pgdemote_proactive",
> +     [I(PGSTEAL_KSWAPD)]                     = "pgsteal_kswapd",
> +     [I(PGSTEAL_DIRECT)]                     = "pgsteal_direct",
> +     [I(PGSTEAL_KHUGEPAGED)]                 = "pgsteal_khugepaged",
> +     [I(PGSTEAL_PROACTIVE)]                  = "pgsteal_proactive",
> +     [I(PGSTEAL_ANON)]                       = "pgsteal_anon",
> +     [I(PGSTEAL_FILE)]                       = "pgsteal_file",
> +     [I(PGSCAN_KSWAPD)]                      = "pgscan_kswapd",
> +     [I(PGSCAN_DIRECT)]                      = "pgscan_direct",
> +     [I(PGSCAN_KHUGEPAGED)]                  = "pgscan_khugepaged",
> +     [I(PGSCAN_PROACTIVE)]                   = "pgscan_proactive",
> +     [I(PGSCAN_ANON)]                        = "pgscan_anon",
> +     [I(PGSCAN_FILE)]                        = "pgscan_file",
> +     [I(PGREFILL)]                           = "pgrefill",
>  #ifdef CONFIG_HUGETLB_PAGE
>       [I(NR_HUGETLB)]                         = "nr_hugetlb",
>  #endif
> @@ -1318,21 +1331,8 @@ const char * const vmstat_text[] = {
>       [I(PGMAJFAULT)]                         = "pgmajfault",
>       [I(PGLAZYFREED)]                        = "pglazyfreed",
>  
> -     [I(PGREFILL)]                           = "pgrefill",
>       [I(PGREUSE)]                            = "pgreuse",
> -     [I(PGSTEAL_KSWAPD)]                     = "pgsteal_kswapd",
> -     [I(PGSTEAL_DIRECT)]                     = "pgsteal_direct",
> -     [I(PGSTEAL_KHUGEPAGED)]                 = "pgsteal_khugepaged",
> -     [I(PGSTEAL_PROACTIVE)]                  = "pgsteal_proactive",
> -     [I(PGSCAN_KSWAPD)]                      = "pgscan_kswapd",
> -     [I(PGSCAN_DIRECT)]                      = "pgscan_direct",
> -     [I(PGSCAN_KHUGEPAGED)]                  = "pgscan_khugepaged",
> -     [I(PGSCAN_PROACTIVE)]                   = "pgscan_proactive",
>       [I(PGSCAN_DIRECT_THROTTLE)]             = "pgscan_direct_throttle",
> -     [I(PGSCAN_ANON)]                        = "pgscan_anon",
> -     [I(PGSCAN_FILE)]                        = "pgscan_file",
> -     [I(PGSTEAL_ANON)]                       = "pgsteal_anon",
> -     [I(PGSTEAL_FILE)]                       = "pgsteal_file",
>  
>  #ifdef CONFIG_NUMA
>       [I(PGSCAN_ZONE_RECLAIM_SUCCESS)]        = "zone_reclaim_success",
> -- 
> 2.47.3

-- 
Michal Hocko
SUSE Labs

Reply via email to