From: Shivansh Dhiman <shivansh.dhi...@amd.com> Add NUMA mempolicy support to the filemap allocation path by introducing new APIs that take a mempolicy argument: - filemap_grab_folio_mpol() - filemap_alloc_folio_mpol() - __filemap_get_folio_mpol()
These APIs allow callers to specify a NUMA policy during page cache allocations, enabling fine-grained control over memory placement. This is particularly needed by KVM when using guest-memfd memory backends, where the guest memory needs to be allocated according to the NUMA policy specified by VMM. The existing non-mempolicy APIs remain unchanged and continue to use the default allocation behavior. Signed-off-by: Shivansh Dhiman <shivansh.dhi...@amd.com> Signed-off-by: Shivank Garg <shiva...@amd.com> --- include/linux/pagemap.h | 41 +++++++++++++++++++++++++++++++++++++++++ mm/filemap.c | 27 +++++++++++++++++++++++---- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 26baa78f1ca7..bc5231626557 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -637,15 +637,24 @@ static inline void *detach_page_private(struct page *page) #ifdef CONFIG_NUMA struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order); +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order, + struct mempolicy *mpol, pgoff_t ilx); #else static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) { return folio_alloc_noprof(gfp, order); } +static inline struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, + unsigned int order, struct mempolicy *mpol, pgoff_t ilx) +{ + return filemap_alloc_folio_noprof(gfp, order); +} #endif #define filemap_alloc_folio(...) \ alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__)) +#define filemap_alloc_folio_mpol(...) \ + alloc_hooks(filemap_alloc_folio_mpol_noprof(__VA_ARGS__)) static inline struct page *__page_cache_alloc(gfp_t gfp) { @@ -737,6 +746,8 @@ static inline fgf_t fgf_set_order(size_t size) void *filemap_get_entry(struct address_space *mapping, pgoff_t index); struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp); +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, + pgoff_t index, fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol, pgoff_t ilx); struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp); @@ -795,6 +806,36 @@ static inline struct folio *filemap_grab_folio(struct address_space *mapping, mapping_gfp_mask(mapping)); } +/** + * filemap_grab_folio_mpol - grab a folio from the page cache. + * @mapping: The address space to search. + * @index: The page index. + * @mpol: The mempolicy to apply when allocating a new folio. + * @ilx: The interleave index, for use only with MPOL_INTERLEAVE or + * MPOL_WEIGHTED_INTERLEAVE. + * + * Same as filemap_grab_folio(), except that it allocates the folio using + * given memory policy. + * + * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found + * and failed to create a folio. + */ +#ifdef CONFIG_NUMA +static inline struct folio *filemap_grab_folio_mpol(struct address_space *mapping, + pgoff_t index, struct mempolicy *mpol, pgoff_t ilx) +{ + return __filemap_get_folio_mpol(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping_gfp_mask(mapping), mpol, ilx); +} +#else +static inline struct folio *filemap_grab_folio_mpol(struct address_space *mapping, + pgoff_t index, struct mempolicy *mpol, pgoff_t ilx) +{ + return filemap_grab_folio(mapping, index); +} +#endif /* CONFIG_NUMA */ + /** * find_get_page - find and get a page reference * @mapping: the address_space to search diff --git a/mm/filemap.c b/mm/filemap.c index b5e784f34d98..7b06ee4b4d63 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1007,6 +1007,15 @@ struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) return folio_alloc_noprof(gfp, order); } EXPORT_SYMBOL(filemap_alloc_folio_noprof); + +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order, + struct mempolicy *mpol, pgoff_t ilx) +{ + if (mpol) + return folio_alloc_mpol_noprof(gfp, order, mpol, + ilx, numa_node_id()); + return filemap_alloc_folio_noprof(gfp, order); +} #endif /* @@ -1880,11 +1889,14 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index) } /** - * __filemap_get_folio - Find and get a reference to a folio. + * __filemap_get_folio_mpol - Find and get a reference to a folio. * @mapping: The address_space to search. * @index: The page index. * @fgp_flags: %FGP flags modify how the folio is returned. * @gfp: Memory allocation flags to use if %FGP_CREAT is specified. + * @mpol: The mempolicy to apply when allocating a new folio. + * @ilx: The interleave index, for use only with MPOL_INTERLEAVE or + * MPOL_WEIGHTED_INTERLEAVE. * * Looks up the page cache entry at @mapping & @index. * @@ -1895,8 +1907,8 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index) * * Return: The found folio or an ERR_PTR() otherwise. */ -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - fgf_t fgp_flags, gfp_t gfp) +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, pgoff_t index, + fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol, pgoff_t ilx) { struct folio *folio; @@ -1966,7 +1978,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, err = -ENOMEM; if (order > min_order) alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN; - folio = filemap_alloc_folio(alloc_gfp, order); + folio = filemap_alloc_folio_mpol(alloc_gfp, order, mpol, ilx); if (!folio) continue; @@ -2013,6 +2025,13 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, folio_clear_dropbehind(folio); return folio; } +EXPORT_SYMBOL(__filemap_get_folio_mpol); + +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, + fgf_t fgp_flags, gfp_t gfp) +{ + return __filemap_get_folio_mpol(mapping, index, fgp_flags, gfp, NULL, 0); +} EXPORT_SYMBOL(__filemap_get_folio); static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max, -- 2.34.1