Instead of splitting the large folio uniformly during truncation, try to
use buddy allocator like folio_split() at the start and the end of a
truncation range to minimize the number of resulting folios if it is
supported. try_folio_split() is introduced to use folio_split() if
supported and it falls back to uniform split otherwise.

For example, to truncate a order-4 folio
[0, 1, 2, 3, 4, 5, ..., 15]
between [3, 10] (inclusive), folio_split() splits the folio at 3 to
[0,1], [2], [3], [4..7], [8..15] and [3], [4..7] can be dropped and
[8..15] is kept with zeros in [8..10], then another folio_split() is
done at 10, so [8..10] can be dropped.

One possible optimization is to make folio_split() to split a folio based
on a given range, like [3..10] above.  But that complicates folio_split(),
so it will be investigated when necessary.

Link: https://lkml.kernel.org/r/20250226210032.2044041-8-...@nvidia.com
Signed-off-by: Zi Yan <z...@nvidia.com>
Cc: Baolin Wang <baolin.w...@linux.alibaba.com>
Cc: David Hildenbrand <da...@redhat.com>
Cc: Hugh Dickins <hu...@google.com>
Cc: John Hubbard <jhubb...@nvidia.com>
Cc: Kefeng Wang <wangkefeng.w...@huawei.com>
Cc: Kirill A. Shuemov <kirill.shute...@linux.intel.com>
Cc: Matthew Wilcox <wi...@infradead.org>
Cc: Miaohe Lin <linmia...@huawei.com>
Cc: Ryan Roberts <ryan.robe...@arm.com>
Cc: Yang Shi <y...@os.amperecomputing.com>
Cc: Yu Zhao <yuz...@google.com>
Cc: Kairui Song <kas...@tencent.com>
Signed-off-by: Andrew Morton <a...@linux-foundation.org>
---
 include/linux/huge_mm.h | 36 ++++++++++++++++++++++++++++++++++++
 mm/huge_memory.c        |  6 +++---
 mm/truncate.c           | 37 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e57e811cfd3c..e893d546a49f 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -345,6 +345,36 @@ int split_huge_page_to_list_to_order(struct page *page, 
struct list_head *list,
                unsigned int new_order);
 int min_order_for_split(struct folio *folio);
 int split_folio_to_list(struct folio *folio, struct list_head *list);
+bool uniform_split_supported(struct folio *folio, unsigned int new_order,
+               bool warns);
+bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
+               bool warns);
+int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
+               struct list_head *list);
+/*
+ * try_folio_split - try to split a @folio at @page using non uniform split.
+ * @folio: folio to be split
+ * @page: split to order-0 at the given page
+ * @list: store the after-split folios
+ *
+ * Try to split a @folio at @page using non uniform split to order-0, if
+ * non uniform split is not supported, fall back to uniform split.
+ *
+ * Return: 0: split is successful, otherwise split failed.
+ */
+static inline int try_folio_split(struct folio *folio, struct page *page,
+               struct list_head *list)
+{
+       int ret = min_order_for_split(folio);
+
+       if (ret < 0)
+               return ret;
+
+       if (!non_uniform_split_supported(folio, 0, false))
+               return split_huge_page_to_list_to_order(&folio->page, list,
+                               ret);
+       return folio_split(folio, ret, page, list);
+}
 static inline int split_huge_page(struct page *page)
 {
        struct folio *folio = page_folio(page);
@@ -537,6 +567,12 @@ static inline int split_folio_to_list(struct folio *folio, 
struct list_head *lis
        return 0;
 }
 
+static inline int try_folio_split(struct folio *folio, struct page *page,
+               struct list_head *list)
+{
+       return 0;
+}
+
 static inline void deferred_split_folio(struct folio *folio, bool 
partially_mapped) {}
 #define split_huge_pmd(__vma, __pmd, __address)        \
        do { } while (0)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d44b6d4b7f32..8a42150298de 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3648,7 +3648,7 @@ static int __split_unmapped_folio(struct folio *folio, 
int new_order,
        return ret;
 }
 
-static bool non_uniform_split_supported(struct folio *folio, unsigned int 
new_order,
+bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
                bool warns)
 {
        if (folio_test_anon(folio)) {
@@ -3680,7 +3680,7 @@ static bool non_uniform_split_supported(struct folio 
*folio, unsigned int new_or
 }
 
 /* See comments in non_uniform_split_supported() */
-static bool uniform_split_supported(struct folio *folio, unsigned int 
new_order,
+bool uniform_split_supported(struct folio *folio, unsigned int new_order,
                bool warns)
 {
        if (folio_test_anon(folio)) {
@@ -3999,7 +3999,7 @@ int split_huge_page_to_list_to_order(struct page *page, 
struct list_head *list,
  *
  * After split, folio is left locked for caller.
  */
-static int folio_split(struct folio *folio, unsigned int new_order,
+int folio_split(struct folio *folio, unsigned int new_order,
                struct page *split_at, struct list_head *list)
 {
        return __folio_split(folio, new_order, split_at, &folio->page, list,
diff --git a/mm/truncate.c b/mm/truncate.c
index 0395e578d946..0790b6227512 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -192,6 +192,7 @@ bool truncate_inode_partial_folio(struct folio *folio, 
loff_t start, loff_t end)
 {
        loff_t pos = folio_pos(folio);
        unsigned int offset, length;
+       struct page *split_at, *split_at2;
 
        if (pos < start)
                offset = start - pos;
@@ -221,8 +222,42 @@ bool truncate_inode_partial_folio(struct folio *folio, 
loff_t start, loff_t end)
                folio_invalidate(folio, offset, length);
        if (!folio_test_large(folio))
                return true;
-       if (split_folio(folio) == 0)
+
+       split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
+       split_at2 = folio_page(folio,
+                       PAGE_ALIGN_DOWN(offset + length) / PAGE_SIZE);
+
+       if (!try_folio_split(folio, split_at, NULL)) {
+               /*
+                * try to split at offset + length to make sure folios within
+                * the range can be dropped, especially to avoid memory waste
+                * for shmem truncate
+                */
+               struct folio *folio2 = page_folio(split_at2);
+
+               if (!folio_try_get(folio2))
+                       goto no_split;
+
+               if (!folio_test_large(folio2))
+                       goto out;
+
+               if (!folio_trylock(folio2))
+                       goto out;
+
+               /*
+                * make sure folio2 is large and does not change its mapping.
+                * Its split result does not matter here.
+                */
+               if (folio_test_large(folio2) &&
+                   folio2->mapping == folio->mapping)
+                       try_folio_split(folio2, split_at2, NULL);
+
+               folio_unlock(folio2);
+out:
+               folio_put(folio2);
+no_split:
                return true;
+       }
        if (folio_test_dirty(folio))
                return false;
        truncate_inode_folio(folio->mapping, folio);
-- 
2.47.2


Reply via email to