On Mon, Jun 08, 2026 at 04:39:37AM -0400, Michael S. Tsirkin wrote:
> Add bool *zeroed output to alloc_hugetlb_folio_reserve() so
> callers can check whether the pool page is known-zero.  memfd's
> memfd_alloc_folio() uses this to skip the explicit folio_zero_user()
> when the page is already zero.

But why does memfd do that?

This is more AI-ish 'write out in English what the code does' which isn't
really helpful.

>
> This avoids redundant zeroing for memfd hugetlb pages that were
> pre-allocated into the pool and never mapped to userspace.

I think this should lead the commit message given it seems to be the whole
intent no?

>
> Note: HPG_zeroed is currently only set for surplus pages
> allocated with __GFP_ZERO (via alloc_surplus_hugetlb_folio),
> not for pool pages from alloc_pool_huge_folio. So the
> zeroed output from alloc_hugetlb_folio_reserve is typically
> false for pool-only reservations. It becomes true when
> surplus pages fill the reservation. The addr_hint 0 passed
> to folio_zero_user is acceptable for memfd: these pages are
> not mapped yet and will get proper dcache handling at mmap
> time via the page fault path.

This paragraph is really hard to read, and you don't seem to propagate the
same very specific information in the code so people maintaining it don't
know what's going on.

>
> Signed-off-by: Michael S. Tsirkin <[email protected]>
> Assisted-by: Claude:claude-opus-4-6

This is committing the sins of the rest and adding more complexity
throughout.

The whole approach needs a rework I think, but hugetlbfs stuff should be
deferred in general.

> ---
>  include/linux/cma.h     |  3 ++-
>  include/linux/hugetlb.h |  6 ++++--
>  mm/cma.c                |  6 ++++--
>  mm/hugetlb.c            | 11 +++++++++--
>  mm/hugetlb_cma.c        |  4 ++--
>  mm/memfd.c              | 14 ++++++++------
>  6 files changed, 29 insertions(+), 15 deletions(-)
>
> diff --git a/include/linux/cma.h b/include/linux/cma.h
> index 8555d38a97b1..dee88909cf5d 100644
> --- a/include/linux/cma.h
> +++ b/include/linux/cma.h
> @@ -53,7 +53,8 @@ extern bool cma_release(struct cma *cma, const struct page 
> *pages, unsigned long
>
>  struct page *cma_alloc_frozen(struct cma *cma, unsigned long count,
>               unsigned int align, bool no_warn);
> -struct page *cma_alloc_frozen_compound(struct cma *cma, unsigned int order);
> +struct page *cma_alloc_frozen_compound(struct cma *cma, unsigned int order,
> +                                    gfp_t caller_gfp);
>  bool cma_release_frozen(struct cma *cma, const struct page *pages,
>               unsigned long count);
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 06d033a57a61..7eb529eabe99 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -708,7 +708,8 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate 
> *h, int preferred_nid,
>                               nodemask_t *nmask, gfp_t gfp_mask,
>                               bool allow_alloc_fallback);
>  struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int 
> preferred_nid,
> -                                       nodemask_t *nmask, gfp_t gfp_mask);
> +                                       nodemask_t *nmask, gfp_t gfp_mask,
> +                                       bool *zeroed);
>
>  int hugetlb_add_to_page_cache(struct folio *folio, struct address_space 
> *mapping,
>                       pgoff_t idx);
> @@ -1128,7 +1129,8 @@ static inline void wait_for_freed_hugetlb_folios(void)
>
>  static inline struct folio *
>  alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
> -                         nodemask_t *nmask, gfp_t gfp_mask)
> +                         nodemask_t *nmask, gfp_t gfp_mask,
> +                         bool *zeroed)
>  {
>       return NULL;
>  }
> diff --git a/mm/cma.c b/mm/cma.c
> index c7ca567f4c5c..27971f6264ab 100644
> --- a/mm/cma.c
> +++ b/mm/cma.c
> @@ -924,9 +924,11 @@ struct page *cma_alloc_frozen(struct cma *cma, unsigned 
> long count,
>       return __cma_alloc_frozen(cma, count, align, gfp);
>  }
>
> -struct page *cma_alloc_frozen_compound(struct cma *cma, unsigned int order)
> +struct page *cma_alloc_frozen_compound(struct cma *cma, unsigned int order,
> +                                    gfp_t caller_gfp)
>  {
> -     gfp_t gfp = GFP_KERNEL | __GFP_COMP | __GFP_NOWARN;
> +     gfp_t gfp = GFP_KERNEL | __GFP_COMP | __GFP_NOWARN |
> +                 (caller_gfp & __GFP_ZERO);
>
>       return __cma_alloc_frozen(cma, 1 << order, order, gfp);
>  }
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index ed00db703911..a087e915783f 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2196,7 +2196,7 @@ struct folio 
> *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h,
>  }
>
>  struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int 
> preferred_nid,
> -             nodemask_t *nmask, gfp_t gfp_mask)
> +             nodemask_t *nmask, gfp_t gfp_mask, bool *zeroed)
>  {
>       struct folio *folio;
>
> @@ -2212,6 +2212,12 @@ struct folio *alloc_hugetlb_folio_reserve(struct 
> hstate *h, int preferred_nid,
>               h->resv_huge_pages--;
>
>       spin_unlock_irq(&hugetlb_lock);
> +
> +     if (zeroed && folio) {
> +             *zeroed = folio_test_hugetlb_zeroed(folio);
> +             folio_clear_hugetlb_zeroed(folio);
> +     }
> +
>       return folio;
>  }
>
> @@ -2296,7 +2302,8 @@ static int gather_surplus_pages(struct hstate *h, long 
> delta)
>                * It is okay to use NUMA_NO_NODE because we use numa_mem_id()
>                * down the road to pick the current node if that is the case.
>                */
> -             folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h),
> +             folio = alloc_surplus_hugetlb_folio(h,
> +                                                 htlb_alloc_mask(h),
>                                                   NUMA_NO_NODE, 
> &alloc_nodemask,
>                                                   USER_ADDR_NONE);
>               if (!folio) {
> diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
> index 7693ccefd0c6..c9266b25be3d 100644
> --- a/mm/hugetlb_cma.c
> +++ b/mm/hugetlb_cma.c
> @@ -35,14 +35,14 @@ struct folio *hugetlb_cma_alloc_frozen_folio(int order, 
> gfp_t gfp_mask,
>               return NULL;
>
>       if (hugetlb_cma[nid])
> -             page = cma_alloc_frozen_compound(hugetlb_cma[nid], order);
> +             page = cma_alloc_frozen_compound(hugetlb_cma[nid], order, 
> gfp_mask);
>
>       if (!page && !(gfp_mask & __GFP_THISNODE)) {
>               for_each_node_mask(node, *nodemask) {
>                       if (node == nid || !hugetlb_cma[node])
>                               continue;
>
> -                     page = cma_alloc_frozen_compound(hugetlb_cma[node], 
> order);
> +                     page = cma_alloc_frozen_compound(hugetlb_cma[node], 
> order, gfp_mask);
>                       if (page)
>                               break;
>               }
> diff --git a/mm/memfd.c b/mm/memfd.c
> index abe13b291ddc..a99617a62e33 100644
> --- a/mm/memfd.c
> +++ b/mm/memfd.c
> @@ -69,6 +69,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t 
> idx)
>  #ifdef CONFIG_HUGETLB_PAGE
>       struct folio *folio;
>       gfp_t gfp_mask;
> +     bool zeroed;
>
>       if (is_file_hugepages(memfd)) {
>               /*
> @@ -93,17 +94,18 @@ struct folio *memfd_alloc_folio(struct file *memfd, 
> pgoff_t idx)
>               folio = alloc_hugetlb_folio_reserve(h,
>                                                   numa_node_id(),
>                                                   NULL,
> -                                                 gfp_mask);
> +                                                 gfp_mask,
> +                                                 &zeroed);
>               if (folio) {
>                       u32 hash;
>
>                       /*
> -                      * Zero the folio to prevent information leaks to 
> userspace.
> -                      * Use folio_zero_user() which is optimized for 
> huge/gigantic
> -                      * pages. Pass 0 as addr_hint since this is not a 
> faulting path
> -                      *  and we don't have a user virtual address yet.
> +                      * Zero the folio to prevent information leaks to
> +                      * userspace.  Skip if the pool page is known-zero
> +                      * (HPG_zeroed set during pool pre-allocation).
>                        */
> -                     folio_zero_user(folio, 0);
> +                     if (!zeroed)
> +                             folio_zero_user(folio, 0);
>
>                       /*
>                        * Mark the folio uptodate before adding to page cache,
> --
> MST
>

Thanks, Lorenzo

Reply via email to