memcontrol: fix cache counter imbalance in mem_cgroup_replace_folio()

Pavel Tikhomirov Tue, 29 Jul 2025 20:21:19 -0700

Looks good for all three patches:

Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>


On 7/29/25 00:46, Konstantin Khorenko wrote:

The function was checking cache flag on the new folio instead of the old one,
causing cache counter imbalance when replacing cache pages.
The new folio is freshly allocated and never has the cache flag set, so
we need to copy the flag from the old folio and charge the cache counter
accordingly.

This fixes potential cache accounting issues during page cache replacement
operations like page migration between NUMA nodes.

Another possible cause of the imbalance is the commit_charge() function
which unconditionally drops MEMCG_DATA_PGCACHE flag on
folio->memcg_data, so enhance commit_charge() to preserve the flag.

The way of adding a new arguments was chosen in order not to miss the
appearance of new commit_charge() calls on rebases.

Fixes: 6a2ca4d515c5 ("mm: Memory cgroup page cache limit")
https://virtuozzo.atlassian.net/browse/VSTOR-111756

Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com>

Feature: mm: Memory cgroup page cache limit
---
v3:
  * enhance mem_cgroup_commit_charge() with cache parameter as well
  * in mem_cgroup_replace_folio() put cache counter handling under
    !mem_cgroup_is_root() condition like other memory counters

v2:
  * fixed 2 other functions where commit_charge() could lose the cache flag
  * fixed mem_cgroup_move_account() which also rewrites the memdata not
    preserving cache flag
  * moved sanity checks under if condition

  include/linux/memcontrol.h |  6 ++--
  mm/hugetlb.c               |  3 +-
  mm/memcontrol-v1.c         |  5 ++-
  mm/memcontrol.c            | 70 +++++++++++++++++++-------------------
  4 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c97bd1a4b3c95..e0df2d395d03f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -696,7 +696,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup 
*target,
                page_counter_read(&memcg->memory);
  }

-void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg);

+void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg,
+                             bool cache_charge);

int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);@@ -1256,7 +1257,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,

static inline void mem_cgroup_commit_charge(struct folio *folio,

-               struct mem_cgroup *memcg)
+                                           struct mem_cgroup *memcg,
+                                           bool cache_charge)
  {
  }

diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index bd46cf476a0e3..93886dba813e0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3106,7 +3106,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct 
*vma,
        }

if (!memcg_charge_ret)

-               mem_cgroup_commit_charge(folio, memcg);
+               /* huge pages are not used for page cache */
+               mem_cgroup_commit_charge(folio, memcg, false);
        mem_cgroup_put(memcg);

return folio;

diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 756bc518cee3f..fd0a563a352b1 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -852,7 +852,10 @@ static int mem_cgroup_move_account(struct folio *folio,

/* Warning should never happen, so don't worry about refcount non-0 */

        WARN_ON_ONCE(folio_unqueue_deferred_split(folio));
-       folio->memcg_data = (unsigned long)to;
+       /* Preserve MEMCG_DATA_PGCACHE flag if it was set */
+       WRITE_ONCE(folio->memcg_data,
+                  (unsigned long)to |
+                  (READ_ONCE(folio->memcg_data) & MEMCG_DATA_PGCACHE));

__folio_memcg_unlock(from);diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index d0fe610306a02..28b8f50504e06 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2524,7 +2524,8 @@ void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, 
unsigned int nr_pages)
                page_counter_uncharge(&memcg->memsw, nr_pages);
  }

-static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)

+static void commit_charge(struct folio *folio, struct mem_cgroup *memcg,
+                         unsigned long cache_flag)
  {
        VM_BUG_ON_FOLIO(folio_memcg_charged(folio), folio);
        /*
@@ -2536,18 +2537,20 @@ static void commit_charge(struct folio *folio, struct 
mem_cgroup *memcg)
         * - exclusive reference
         * - mem_cgroup_trylock_pages()
         */
-       folio->memcg_data = (unsigned long)memcg;
+       WRITE_ONCE(folio->memcg_data, (unsigned long)memcg | cache_flag);
  }

/**

   * mem_cgroup_commit_charge - commit a previously successful try_charge().
   * @folio: folio to commit the charge to.
   * @memcg: memcg previously charged.
+ * @cache_charge: if we charge for page cache or not.
   */
-void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
+void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg,
+                             bool cache_charge)
  {
        css_get(&memcg->css);
-       commit_charge(folio, memcg);
+       commit_charge(folio, memcg, cache_charge ? MEMCG_DATA_PGCACHE : 0);
        memcg1_commit_charge(folio, memcg);
  }

@@ -5267,18 +5270,13 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,

        ret = try_charge(memcg, gfp, folio_nr_pages(folio), cache_charge);
        if (ret)
                goto out;
-
-       mem_cgroup_commit_charge(folio, memcg);
-
        /*
         * We always cleanup this flag on uncharging, it means
         * that during charging we shouldn't have this flag set
         */
-
        VM_BUG_ON(folio_memcg_cache(folio));
-       if (cache_charge)
-               WRITE_ONCE(folio->memcg_data,
-                       READ_ONCE(folio->memcg_data) | MEMCG_DATA_PGCACHE);
+
+       mem_cgroup_commit_charge(folio, memcg, cache_charge);
  out:
        return ret;
  }
@@ -5551,6 +5549,7 @@ void mem_cgroup_replace_folio(struct folio *old, struct 
folio *new)
  {
        struct mem_cgroup *memcg;
        long nr_pages = folio_nr_pages(new);
+       unsigned long cache_flag;

VM_BUG_ON_FOLIO(!folio_test_locked(old), old);

        VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
@@ -5575,32 +5574,33 @@ void mem_cgroup_replace_folio(struct folio *old, struct 
folio *new)
                if (do_memsw_account())
                        page_counter_charge(&memcg->memsw, nr_pages);
        }
-
-       /*
-        * finist explained the idea behind adding a WARN_ON() here:
-        * - we do not want to check flags correctness on each flag change
-        *   because of performance
-        * - we do want to have a warning in case we somehow messed-up and
-        *   have got a folio with wrong bits set
-        * - we do not insist to catch every first buggy page with wrong
-        *   bits
-        *
-        * But we still want to get a warning about the problem sooner or
-        * later if the problem with flags exists.
-        *
-        * To achieve this check if a folio that is marked as cache does
-        * not have any other incompatible flags set.
-        */
-       WARN_ON(folio_memcg_cache(new) &&
-               (folio_test_slab(new)         || folio_test_anon(new)      ||
-                folio_test_swapbacked(new)   || folio_test_swapcache(new) ||
-                folio_test_mappedtodisk(new) || folio_test_ksm(new)));
-
-       if (folio_memcg_cache(new))
+       cache_flag = READ_ONCE(old->memcg_data) & MEMCG_DATA_PGCACHE;
+       if (cache_flag && !mem_cgroup_is_root(memcg)) {
                page_counter_charge(&memcg->cache, nr_pages);

+ /*

+                * finist explained the idea behind adding a WARN_ON() here:
+                * - we do not want to check flags correctness on each flag 
change
+                *   because of performance
+                * - we do want to have a warning in case we somehow messed-up 
and
+                *   have got a folio with wrong bits set
+                * - we do not insist to catch every first buggy page with wrong
+                *   bits
+                *
+                * But we still want to get a warning about the problem sooner 
or
+                * later if the problem with flags exists.
+                *
+                * To achieve this check if a folio that is marked as cache does
+                * not have any other incompatible flags set.
+                */
+               WARN_ON(folio_test_slab(new) || folio_test_swapcache(new)  ||
+                       folio_test_anon(new) || folio_test_swapbacked(new) ||
+                       folio_test_ksm(new)  || folio_test_mappedtodisk(new));
+       }
+
        css_get(&memcg->css);
-       commit_charge(new, memcg);
+       commit_charge(new, memcg, cache_flag);
+
        memcg1_commit_charge(new, memcg);
  }

@@ -5639,7 +5639,7 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)

                return;

/* Transfer the charge and the css ref */

-       commit_charge(new, memcg);
+       commit_charge(new, memcg, READ_ONCE(old->memcg_data) & 
MEMCG_DATA_PGCACHE);

/* Warning should never happen, so don't worry about refcount non-0 */

        WARN_ON_ONCE(folio_unqueue_deferred_split(old));


--
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH vz10 v3 1/2] mm/memcontrol: fix cache counter imbalance in mem_cgroup_replace_folio()

Reply via email to