On 1 Sep 2025, at 15:10, David Hildenbrand wrote:

>>>>> (a) Will this actually do anything? Also, maybe it does now, but can't 
>>>>> the kernel just optimize that out in the future?
>>>>
>>>> It remaps each subpage of 4 PMD THPs into a contiguous 2MB vaddr range and
>>>> perform split on that range.
>>>
>>> I'm afraid I am missing the "why".
>>>
>>> I would have thought that a "split_pte_mapped_thp" test would want to 
>>> pte-map THPs
>>> to the see if they can be split.
>>>
>>> Why is the mremap required? IOW, what exactly is the test trying to test 
>>> that
>>> exceeds "split_pte_mapped_thp" ?
>>
>> IMHO, it is an interesting test case for splitting a THP when only a subpage
>> is mapped into a vaddr range and in a contiguous vaddr each page comes from
>> different THPs.
>
> Right. Slightly similar to just MAV_DONTNEED'ing the other PTEs and trying to
> split the bigger range.
>
> Of course, if you involve more mremap, the RMAP logic of installing migration
> ptes will get stressed more.
>
> So yes, there are various ways on how to stress the RMAP walk when splitting.
>
>> The mprotect test case you are mentioning would still have all
>> subpages mapped by contiguous vaddrs.
>
> Right, it would not stress RMAP as much.
>
>>
>> But if you think both are just testing PTE-mapped THPs, feel free to replace 
>> the
>> existing one with the mprotect test case. In addition, is_backed_by_folio()
>> can be reverted back to its prior version, since it no longer needs to handle
>> the case where subpages from different THPs can be mapped into a vaddr range.
>
> Oh, the is_backed_by_folio() change is actually really valuable.
>
>
> I think I was confused by the implementation that works on a single virtual 
> address
> range with multiple different variables, questioning why we mremap at all.
>
> I tried cleaning up that test myself and ended up with the following (it
> escalated a bit). If that looks cleaner to you as well, I can submit that as a
> patch.
>
> diff --git a/tools/testing/selftests/mm/split_huge_page_test.c 
> b/tools/testing/selftests/mm/split_huge_page_test.c
> index 10ae65ea032f6..aa0f0502efa06 100644
> --- a/tools/testing/selftests/mm/split_huge_page_test.c
> +++ b/tools/testing/selftests/mm/split_huge_page_test.c
> @@ -390,67 +390,88 @@ static void split_pmd_thp_to_order(int order)
>   static void split_pte_mapped_thp(void)
>  {
> -     char *one_page, *pte_mapped, *pte_mapped2;
> -     size_t len = 4 * pmd_pagesize;
> -     uint64_t thp_size;
> +     const size_t nr_thps = 4;
> +     const size_t thp_area_size = nr_thps * pmd_pagesize;
> +     const size_t page_area_size = nr_thps * pagesize;
> +     char *thp_area, *page_area = NULL, *tmp;
>       size_t i;
>  -    one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE,
> +     thp_area = mmap((void *)(1UL << 30), thp_area_size, PROT_READ | 
> PROT_WRITE,
>                       MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> -     if (one_page == MAP_FAILED)
> -             ksft_exit_fail_msg("Fail to allocate memory: %s\n", 
> strerror(errno));
> +     if (thp_area == MAP_FAILED) {
> +             ksft_test_result_fail("Fail to allocate memory: %s\n", 
> strerror(errno));
> +             goto out;

thp_area mmap failed and out label will try to munmap MAP_FAILED, which is
(void *) -1. munmap will fail with -EINVAL.

> +     }
>  -    madvise(one_page, len, MADV_HUGEPAGE);
> +     madvise(thp_area, thp_area_size, MADV_HUGEPAGE);
>  -    for (i = 0; i < len; i++)
> -             one_page[i] = (char)i;
> +     for (i = 0; i < thp_area_size; i++)
> +             thp_area[i] = (char)i;
>  -    if (!check_huge_anon(one_page, 4, pmd_pagesize))
> -             ksft_exit_fail_msg("No THP is allocated\n");
> +     if (!check_huge_anon(thp_area, nr_thps, pmd_pagesize)) {
> +             ksft_test_result_skip("Not all THPs allocated\n");
> +             goto out;
> +     }
>  -    /* remap the first pagesize of first THP */
> -     pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE);
> -
> -     /* remap the Nth pagesize of Nth THP */
> -     for (i = 1; i < 4; i++) {
> -             pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i,
> -                                  pagesize, pagesize,
> -                                  MREMAP_MAYMOVE|MREMAP_FIXED,
> -                                  pte_mapped + pagesize * i);
> -             if (pte_mapped2 == MAP_FAILED)
> -                     ksft_exit_fail_msg("mremap failed: %s\n", 
> strerror(errno));
> -     }
> -
> -     /* smap does not show THPs after mremap, use kpageflags instead */
> -     thp_size = 0;
> -     for (i = 0; i < pagesize * 4; i++)
> -             if (i % pagesize == 0 &&
> -                 is_backed_by_folio(&pte_mapped[i], pmd_order, pagemap_fd, 
> kpageflags_fd))
> -                     thp_size++;
> -
> -     if (thp_size != 4)
> -             ksft_exit_fail_msg("Some THPs are missing during mremap\n");
> -
> -     /* split all remapped THPs */
> -     write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped,
> -                   (uint64_t)pte_mapped + pagesize * 4, 0);
> -
> -     /* smap does not show THPs after mremap, use kpageflags instead */
> -     thp_size = 0;
> -     for (i = 0; i < pagesize * 4; i++) {
> -             if (pte_mapped[i] != (char)i)
> -                     ksft_exit_fail_msg("%ld byte corrupted\n", i);
> +     /*
> +      * To challenge spitting code, we will mremap page[x] of the
> +      * thp[x] into a smaller area, and trigger the split from that
> +      * smaller area. This will end up replacing the PMD mappings in
> +      * the thp_area by PTE mappings first, leaving the THPs unsplit.
> +      */
> +     page_area = mmap(NULL, page_area_size, PROT_READ | PROT_WRITE,
> +                     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +     if (page_area == MAP_FAILED) {
> +             ksft_test_result_fail("Fail to allocate memory: %s\n", 
> strerror(errno));
> +             goto out;
> +     }
>  -            if (i % pagesize == 0 &&
> -                 !is_backed_by_folio(&pte_mapped[i], 0, pagemap_fd, 
> kpageflags_fd))
> -                     thp_size++;
> +     for (i = 0; i < nr_thps; i++) {
> +             tmp = mremap(thp_area + pmd_pagesize * i + pagesize * i,
> +                          pagesize, pagesize, MREMAP_MAYMOVE|MREMAP_FIXED,
> +                          page_area + pagesize * i);
> +             if (tmp != MAP_FAILED)
> +                     continue;
> +             ksft_test_result_fail("mremap failed: %s\n", strerror(errno));
> +             goto out;
> +     }
> +
> +     /*
> +      * Verify that our THPs were not split yet. Note that
> +      * check_huge_anon() cannot be used as it checks for PMD mappings.
> +      */
> +     for (i = 0; i < nr_thps; i++) {
> +             if (is_backed_by_folio(page_area + i * pagesize, pmd_order,
> +                                    pagemap_fd, kpageflags_fd))
> +                     continue;
> +             ksft_test_result_fail("THP %zu missing after mremap\n", i);
> +             goto out;
>       }
>  -    if (thp_size)
> -             ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size);
> +     /* Split all THPs through the remapped pages. */
> +     write_debugfs(PID_FMT, getpid(), (uint64_t)page_area,
> +                   (uint64_t)page_area + page_area_size, 0);
> +
> +     /* Corruption during mremap or split? */
> +     for (i = 0; i < page_area_size; i++) {
> +             if (page_area[i] == (char)i)
> +                     continue;
> +             ksft_test_result_fail("%zu byte corrupted\n", i);
> +             goto out;
> +     }
> +
> +     /* Split failed? */
> +     for (i = 0; i < nr_thps; i++) {
> +             if (is_backed_by_folio(&page_area[i], 0, pagemap_fd, 
> kpageflags_fd))
                        
page_area + i * pagesize, like Wei pointed out in another email.

> +                     continue;
> +             ksft_test_result_fail("THP %zu not split\n", i);
> +     }
>       ksft_test_result_pass("Split PTE-mapped huge pages successful\n");
> -     munmap(one_page, len);
> +out:
> +     munmap(thp_area, thp_area_size);
> +     if (page_area)
> +             munmap(page_area, page_area_size);
>  }
>   static void split_file_backed_thp(int order)
> -- 
> 2.50.1

Otherwise, LGTM. With all the changes in this email and other email,
feel free to add Reviewed-by: Zi Yan <z...@nvidia.com> when you send it
out formally.

Thank you for cleaning this up.

Best Regards,
Yan, Zi

Reply via email to