On 6/2/26 19:23, Nico Pache wrote:
> 
> 
> On 6/1/26 7:15 AM, David Hildenbrand (Arm) wrote:
>>>
>>> So I looked into your items below. It seems logical, and I think it
>>> works the same way; however, your method seems slightly harder to
>>> understand due to all the edge cases and more error-prone to future
>>> changes (the stack holds implicit knowledge of the offset/order that
>>> must now be tracked in the edge cases).
>>>
>>> Given the stack is 24 bytes, I'm not sure if the extra complexity is
>>> worth saving that small amount of memory. Although we would also be
>>> getting rid of (3?) functions, so both approaches have pros and cons.
>>
>> I consider a simple forward loop over the offset ... less complexity 
>> compared to
>> a stack structure :)
>>
>>>
>>> I will implement a patch comparing your solution against mine and send
>>> it here, then we can decide which approach is better.
>>
>> Right, throw it over the fence and I'll see how to improve it further.
> 
> Ok heres what the diff looks like on top of my V19. 
> 
> you can access the tree here 
> https://gitlab.com/npache/linux/-/commits/mthp-v19?ref_type=heads for easier 
> review.
> 
> So far I have no problem with this approach it appeared cleaner than i 
> thought. Did some light testing. Gonna throw it more through the ringer 
> tomorrow. 

It's very clean.

Almost too nice to be true ;)

[...]

>       unsigned int nr_occupied_ptes, nr_ptes, max_ptes_none;
>       enum scan_result last_result = SCAN_FAIL;
> -     int collapsed = 0, stack_size = 0;
> +     int collapsed = 0;
>       bool alloc_failed = false;
>       unsigned long collapse_address;
> -     struct mthp_range range;
> -     u16 offset;
> -     u8 order;
> +     unsigned int offset = 0;
> +     unsigned int order = HPAGE_PMD_ORDER;


In include/linux/huge_mm.h we have

        highest_order()

and

        next_order()

They essentially allow you to get rid of the test_bit() and just jump to the
next enabled order right away.

I assume with only a handful of enabled_orders, that might be much more 
efficient.

I tried to optimize it and ended with the following, which is completely 
untested.

I think it might make sense to defer that and start with the simple approach 
you have.

I do wonder, though, about the last hunk below: should we bail out early if
enabled_orders is suddenly 0?



>From 0d8ff955b3071f354b7fc9b627820fa374fa99dc Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <[email protected]>
Date: Wed, 3 Jun 2026 11:52:44 +0200
Subject: [PATCH] tmp

Signed-off-by: David Hildenbrand (Arm) <[email protected]>
---
 include/linux/huge_mm.h |   5 ++
 mm/khugepaged.c         | 132 ++++++++++++++++++++++------------------
 2 files changed, 78 insertions(+), 59 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 48496f09909b..099318bc1181 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -205,6 +205,11 @@ static inline int highest_order(unsigned long orders)
        return fls_long(orders) - 1;
 }
 
+static inline int smallest_order(unsigned long orders)
+{
+       return __ffs(orders);
+}
+
 static inline int next_order(unsigned long *orders, int prev)
 {
        *orders &= ~BIT(prev);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 6de935e76ceb..49be9d1a88cb 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -99,8 +99,6 @@ static DEFINE_READ_MOSTLY_HASHTABLE(mm_slots_hash, 
MM_SLOTS_HASH_BITS);
 
 static struct kmem_cache *mm_slot_cache __ro_after_init;
 
-#define KHUGEPAGED_MIN_MTHP_ORDER      2
-
 struct collapse_control {
        bool is_khugepaged;
 
@@ -1454,76 +1452,86 @@ static enum scan_result collapse_huge_page(struct 
mm_struct *mm, unsigned long s
  */
 static enum scan_result mthp_collapse(struct mm_struct *mm,
                unsigned long address, int referenced, int unmapped,
-               struct collapse_control *cc, unsigned long enabled_orders)
+               struct collapse_control *cc, const unsigned long enabled_orders)
 {
-       unsigned int nr_occupied_ptes, nr_ptes, max_ptes_none;
        enum scan_result last_result = SCAN_FAIL;
        int collapsed = 0;
        bool alloc_failed = false;
        unsigned long collapse_address;
        unsigned int offset = 0;
-       unsigned int order = HPAGE_PMD_ORDER;
 
+       /* We cannot collapse anon folios to order-1 or order-0. */
+       VM_WARN_ON_ONCE(!enabled_order || (enabled_orders & 0x3));
 
        while (offset < HPAGE_PMD_NR) {
-               nr_ptes = 1UL << order;
-
-               if (!test_bit(order, &enabled_orders))
-                       goto next_order;
-
-               max_ptes_none = collapse_max_ptes_none(cc, NULL, order);
-               nr_occupied_ptes = bitmap_weight_from(cc->mthp_present_ptes, 
offset,
-                                                     offset + nr_ptes);
-
-               if (nr_occupied_ptes >= nr_ptes - max_ptes_none) {
-                       enum scan_result ret;
-
-                       collapse_address = address + offset * PAGE_SIZE;
-                       ret = collapse_huge_page(mm, collapse_address, 
referenced,
-                                                unmapped, cc, order);
-
-                       switch (ret) {
-                       /* Cases where we continue to next collapse candidate */
-                       case SCAN_SUCCEED:
-                               collapsed += nr_ptes;
-                               fallthrough;
-                       case SCAN_PTE_MAPPED_HUGEPAGE:
-                               goto next_offset;
-                       /* Cases where lower orders might still succeed */
-                       case SCAN_ALLOC_HUGE_PAGE_FAIL:
-                               alloc_failed = true;
-                               fallthrough;
-                       case SCAN_LACK_REFERENCED_PAGE:
-                       case SCAN_EXCEED_NONE_PTE:
-                       case SCAN_EXCEED_SWAP_PTE:
-                       case SCAN_EXCEED_SHARED_PTE:
-                       case SCAN_PAGE_LOCK:
-                       case SCAN_PAGE_COUNT:
-                       case SCAN_PAGE_NULL:
-                       case SCAN_DEL_PAGE_LRU:
-                       case SCAN_PTE_NON_PRESENT:
-                       case SCAN_PTE_UFFD_WP:
-                       case SCAN_PAGE_LAZYFREE:
-                               last_result = ret;
-                               goto next_order;
-                       /* Cases where no further collapse is possible */
-                       case SCAN_PMD_MAPPED:
-                               fallthrough;
-                       default:
-                               last_result = ret;
-                               goto done;
+               /*
+                * We can only collapse to a maximum order for a given offset.
+                * So ignore all orders that do not apply to the current
+                * offset, then see if any order to collapse to remains.
+                */
+               unsigned long orders = enabled_orders & GENMASK(__ffs(offset), 
0);
+               unsigned int order = highest_order(orders);
+
+               while (order) {
+                       const unsigned int nr_ptes = 1UL << order;
+                       unsigned int nr_occupied_ptes, max_ptes_none;
+
+                       max_ptes_none = collapse_max_ptes_none(cc, NULL, order);
+                       nr_occupied_ptes = 
bitmap_weight_from(cc->mthp_present_ptes, offset,
+                                                             offset + nr_ptes);
+
+                       if (nr_occupied_ptes >= nr_ptes - max_ptes_none) {
+                               enum scan_result ret;
+
+                               collapse_address = address + offset * PAGE_SIZE;
+                               ret = collapse_huge_page(mm, collapse_address, 
referenced,
+                                                        unmapped, cc, order);
+
+                               switch (ret) {
+                               /* Cases where we continue to next collapse 
candidate */
+                               case SCAN_SUCCEED:
+                                       collapsed += nr_ptes;
+                                       fallthrough;
+                               case SCAN_PTE_MAPPED_HUGEPAGE:
+                                       goto next_offset;
+                               /* Cases where lower orders might still succeed 
*/
+                               case SCAN_ALLOC_HUGE_PAGE_FAIL:
+                                       alloc_failed = true;
+                                       fallthrough;
+                               case SCAN_LACK_REFERENCED_PAGE:
+                               case SCAN_EXCEED_NONE_PTE:
+                               case SCAN_EXCEED_SWAP_PTE:
+                               case SCAN_EXCEED_SHARED_PTE:
+                               case SCAN_PAGE_LOCK:
+                               case SCAN_PAGE_COUNT:
+                               case SCAN_PAGE_NULL:
+                               case SCAN_DEL_PAGE_LRU:
+                               case SCAN_PTE_NON_PRESENT:
+                               case SCAN_PTE_UFFD_WP:
+                               case SCAN_PAGE_LAZYFREE:
+                                       last_result = ret;
+                                       break;
+                               /* Cases where no further collapse is possible 
*/
+                               case SCAN_PMD_MAPPED:
+                                       fallthrough;
+                               default:
+                                       last_result = ret;
+                                       goto done;
+                               }
                        }
-               }
 
-next_order:
-               if (order > KHUGEPAGED_MIN_MTHP_ORDER &&
-                       (BIT(order) - 1) & enabled_orders) {
-                       order = order - 1;
-                       continue;
+                       order = next_order(&orders, order);
                }
+
 next_offset:
-               offset += nr_ptes;
-               order = min_t(int, __ffs(offset), HPAGE_PMD_ORDER);
+               /*
+                * Continue with the next collapse candidate. If we do not
+                * have an order, skip to nest smallest mTHP we can collapse to.
+                */
+               if (order)
+                       offset += 1UL << order;
+               else
+                       offset = ALIGN(offset + 1, 
smallest_order(enabled_orders));
        }
 done:
        if (collapsed)
@@ -1567,6 +1575,12 @@ static enum scan_result collapse_scan_pmd(struct 
mm_struct *mm,
 
        enabled_orders = collapse_allowable_orders(vma, vma->vm_flags, 
tva_flags);
 
+       if (unlikely(!enabled_orders)) {
+               cc->progress++;
+               result = SCAN_SUCCEED;
+               goto out;
+       }
+
        /*
         * If PMD is the only enabled order, enforce max_ptes_none, otherwise
         * scan all pages to populate the bitmap for mTHP collapse.
-- 
2.43.0


-- 
Cheers,

David

Reply via email to