On 1 Sep 2025, at 15:10, David Hildenbrand wrote: >>>>> (a) Will this actually do anything? Also, maybe it does now, but can't >>>>> the kernel just optimize that out in the future? >>>> >>>> It remaps each subpage of 4 PMD THPs into a contiguous 2MB vaddr range and >>>> perform split on that range. >>> >>> I'm afraid I am missing the "why". >>> >>> I would have thought that a "split_pte_mapped_thp" test would want to >>> pte-map THPs >>> to the see if they can be split. >>> >>> Why is the mremap required? IOW, what exactly is the test trying to test >>> that >>> exceeds "split_pte_mapped_thp" ? >> >> IMHO, it is an interesting test case for splitting a THP when only a subpage >> is mapped into a vaddr range and in a contiguous vaddr each page comes from >> different THPs. > > Right. Slightly similar to just MAV_DONTNEED'ing the other PTEs and trying to > split the bigger range. > > Of course, if you involve more mremap, the RMAP logic of installing migration > ptes will get stressed more. > > So yes, there are various ways on how to stress the RMAP walk when splitting. > >> The mprotect test case you are mentioning would still have all >> subpages mapped by contiguous vaddrs. > > Right, it would not stress RMAP as much. > >> >> But if you think both are just testing PTE-mapped THPs, feel free to replace >> the >> existing one with the mprotect test case. In addition, is_backed_by_folio() >> can be reverted back to its prior version, since it no longer needs to handle >> the case where subpages from different THPs can be mapped into a vaddr range. > > Oh, the is_backed_by_folio() change is actually really valuable. > > > I think I was confused by the implementation that works on a single virtual > address > range with multiple different variables, questioning why we mremap at all. > > I tried cleaning up that test myself and ended up with the following (it > escalated a bit). If that looks cleaner to you as well, I can submit that as a > patch. > > diff --git a/tools/testing/selftests/mm/split_huge_page_test.c > b/tools/testing/selftests/mm/split_huge_page_test.c > index 10ae65ea032f6..aa0f0502efa06 100644 > --- a/tools/testing/selftests/mm/split_huge_page_test.c > +++ b/tools/testing/selftests/mm/split_huge_page_test.c > @@ -390,67 +390,88 @@ static void split_pmd_thp_to_order(int order) > static void split_pte_mapped_thp(void) > { > - char *one_page, *pte_mapped, *pte_mapped2; > - size_t len = 4 * pmd_pagesize; > - uint64_t thp_size; > + const size_t nr_thps = 4; > + const size_t thp_area_size = nr_thps * pmd_pagesize; > + const size_t page_area_size = nr_thps * pagesize; > + char *thp_area, *page_area = NULL, *tmp; > size_t i; > - one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, > + thp_area = mmap((void *)(1UL << 30), thp_area_size, PROT_READ | > PROT_WRITE, > MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > - if (one_page == MAP_FAILED) > - ksft_exit_fail_msg("Fail to allocate memory: %s\n", > strerror(errno)); > + if (thp_area == MAP_FAILED) { > + ksft_test_result_fail("Fail to allocate memory: %s\n", > strerror(errno)); > + goto out;
thp_area mmap failed and out label will try to munmap MAP_FAILED, which is (void *) -1. munmap will fail with -EINVAL. > + } > - madvise(one_page, len, MADV_HUGEPAGE); > + madvise(thp_area, thp_area_size, MADV_HUGEPAGE); > - for (i = 0; i < len; i++) > - one_page[i] = (char)i; > + for (i = 0; i < thp_area_size; i++) > + thp_area[i] = (char)i; > - if (!check_huge_anon(one_page, 4, pmd_pagesize)) > - ksft_exit_fail_msg("No THP is allocated\n"); > + if (!check_huge_anon(thp_area, nr_thps, pmd_pagesize)) { > + ksft_test_result_skip("Not all THPs allocated\n"); > + goto out; > + } > - /* remap the first pagesize of first THP */ > - pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); > - > - /* remap the Nth pagesize of Nth THP */ > - for (i = 1; i < 4; i++) { > - pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, > - pagesize, pagesize, > - MREMAP_MAYMOVE|MREMAP_FIXED, > - pte_mapped + pagesize * i); > - if (pte_mapped2 == MAP_FAILED) > - ksft_exit_fail_msg("mremap failed: %s\n", > strerror(errno)); > - } > - > - /* smap does not show THPs after mremap, use kpageflags instead */ > - thp_size = 0; > - for (i = 0; i < pagesize * 4; i++) > - if (i % pagesize == 0 && > - is_backed_by_folio(&pte_mapped[i], pmd_order, pagemap_fd, > kpageflags_fd)) > - thp_size++; > - > - if (thp_size != 4) > - ksft_exit_fail_msg("Some THPs are missing during mremap\n"); > - > - /* split all remapped THPs */ > - write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, > - (uint64_t)pte_mapped + pagesize * 4, 0); > - > - /* smap does not show THPs after mremap, use kpageflags instead */ > - thp_size = 0; > - for (i = 0; i < pagesize * 4; i++) { > - if (pte_mapped[i] != (char)i) > - ksft_exit_fail_msg("%ld byte corrupted\n", i); > + /* > + * To challenge spitting code, we will mremap page[x] of the > + * thp[x] into a smaller area, and trigger the split from that > + * smaller area. This will end up replacing the PMD mappings in > + * the thp_area by PTE mappings first, leaving the THPs unsplit. > + */ > + page_area = mmap(NULL, page_area_size, PROT_READ | PROT_WRITE, > + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > + if (page_area == MAP_FAILED) { > + ksft_test_result_fail("Fail to allocate memory: %s\n", > strerror(errno)); > + goto out; > + } > - if (i % pagesize == 0 && > - !is_backed_by_folio(&pte_mapped[i], 0, pagemap_fd, > kpageflags_fd)) > - thp_size++; > + for (i = 0; i < nr_thps; i++) { > + tmp = mremap(thp_area + pmd_pagesize * i + pagesize * i, > + pagesize, pagesize, MREMAP_MAYMOVE|MREMAP_FIXED, > + page_area + pagesize * i); > + if (tmp != MAP_FAILED) > + continue; > + ksft_test_result_fail("mremap failed: %s\n", strerror(errno)); > + goto out; > + } > + > + /* > + * Verify that our THPs were not split yet. Note that > + * check_huge_anon() cannot be used as it checks for PMD mappings. > + */ > + for (i = 0; i < nr_thps; i++) { > + if (is_backed_by_folio(page_area + i * pagesize, pmd_order, > + pagemap_fd, kpageflags_fd)) > + continue; > + ksft_test_result_fail("THP %zu missing after mremap\n", i); > + goto out; > } > - if (thp_size) > - ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size); > + /* Split all THPs through the remapped pages. */ > + write_debugfs(PID_FMT, getpid(), (uint64_t)page_area, > + (uint64_t)page_area + page_area_size, 0); > + > + /* Corruption during mremap or split? */ > + for (i = 0; i < page_area_size; i++) { > + if (page_area[i] == (char)i) > + continue; > + ksft_test_result_fail("%zu byte corrupted\n", i); > + goto out; > + } > + > + /* Split failed? */ > + for (i = 0; i < nr_thps; i++) { > + if (is_backed_by_folio(&page_area[i], 0, pagemap_fd, > kpageflags_fd)) page_area + i * pagesize, like Wei pointed out in another email. > + continue; > + ksft_test_result_fail("THP %zu not split\n", i); > + } > ksft_test_result_pass("Split PTE-mapped huge pages successful\n"); > - munmap(one_page, len); > +out: > + munmap(thp_area, thp_area_size); > + if (page_area) > + munmap(page_area, page_area_size); > } > static void split_file_backed_thp(int order) > -- > 2.50.1 Otherwise, LGTM. With all the changes in this email and other email, feel free to add Reviewed-by: Zi Yan <z...@nvidia.com> when you send it out formally. Thank you for cleaning this up. Best Regards, Yan, Zi