Message ID | 20250210063227.41125-2-shivankg@amd.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add NUMA mempolicy support for KVM guest-memfd | expand |
On 10.02.25 07:32, Shivank Garg wrote: > From: Shivansh Dhiman <shivansh.dhiman@amd.com> > > Add NUMA mempolicy support to the filemap allocation path by introducing > new APIs that take a mempolicy argument: > - filemap_grab_folio_mpol() > - filemap_alloc_folio_mpol() > - __filemap_get_folio_mpol() > > These APIs allow callers to specify a NUMA policy during page cache > allocations, enabling fine-grained control over memory placement. This is > particularly needed by KVM when using guest-memfd memory backends, where > the guest memory needs to be allocated according to the NUMA policy > specified by VMM. > shmem handles this using custom shmem_alloc_folio()->folio_alloc_mpol(). I'm curious, is there (1) A way to make shmem also use this new API? (2) Handle it in guest_memfd manually, like shmem does? > The existing non-mempolicy APIs remain unchanged and continue to use the > default allocation behavior. > > Signed-off-by: Shivansh Dhiman <shivansh.dhiman@amd.com> > Signed-off-by: Shivank Garg <shivankg@amd.com> > --- > include/linux/pagemap.h | 40 ++++++++++++++++++++++++++++++++++++++++ > mm/filemap.c | 30 +++++++++++++++++++++++++----- > 2 files changed, 65 insertions(+), 5 deletions(-) > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index 47bfc6b1b632..4ae7fa63cb26 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -662,15 +662,25 @@ static inline void *detach_page_private(struct page *page) > > #ifdef CONFIG_NUMA > struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order); > +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order, > + struct mempolicy *mpol); Two tabs indent on second parameter line, please. > #else > static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) > { > return folio_alloc_noprof(gfp, order); > } > +static inline struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, > + unsigned int order, > + struct mempolicy *mpol) > +{ > + return filemap_alloc_folio_noprof(gfp, order); > +} Dito. > #endif > > #define filemap_alloc_folio(...) \ > alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__)) > +#define filemap_alloc_folio_mpol(...) \ > + alloc_hooks(filemap_alloc_folio_mpol_noprof(__VA_ARGS__)) > > static inline struct page *__page_cache_alloc(gfp_t gfp) > { > @@ -762,6 +772,8 @@ static inline fgf_t fgf_set_order(size_t size) > void *filemap_get_entry(struct address_space *mapping, pgoff_t index); > struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, > fgf_t fgp_flags, gfp_t gfp); > +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, > + pgoff_t index, fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol); > struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, > fgf_t fgp_flags, gfp_t gfp); > > @@ -820,6 +832,34 @@ static inline struct folio *filemap_grab_folio(struct address_space *mapping, > mapping_gfp_mask(mapping)); > } > > +/** > + * filemap_grab_folio_mpol - grab a folio from the page cache > + * @mapping: The address space to search > + * @index: The page index > + * @mpol: The mempolicy to apply "The mempolicy to apply when allocating a new folio." ? > + * > + * Same as filemap_grab_folio(), except that it allocates the folio using > + * given memory policy. > + * > + * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found > + * and failed to create a folio. > + */ > +#ifdef CONFIG_NUMA > +static inline struct folio *filemap_grab_folio_mpol(struct address_space *mapping, > + pgoff_t index, struct mempolicy *mpol) > +{ > + return __filemap_get_folio_mpol(mapping, index, > + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, > + mapping_gfp_mask(mapping), mpol); > +} > +#else > +static inline struct folio *filemap_grab_folio_mpol(struct address_space *mapping, > + pgoff_t index, struct mempolicy *mpol) > +{ > + return filemap_grab_folio(mapping, index); > +} > +#endif /* CONFIG_NUMA */ > + > /** > * find_get_page - find and get a page reference > * @mapping: the address_space to search > diff --git a/mm/filemap.c b/mm/filemap.c > index 804d7365680c..c5ea32702774 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -1001,8 +1001,13 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio, > EXPORT_SYMBOL_GPL(filemap_add_folio); > > #ifdef CONFIG_NUMA > -struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) > +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order, > + struct mempolicy *mpol) > { > + if (mpol) > + return folio_alloc_mpol_noprof(gfp, order, mpol, > + NO_INTERLEAVE_INDEX, numa_node_id()); > + This should go below the variable declaration. (and indentation on second parameter line should align with the first parameter) > int n; > struct folio *folio; > > @@ -1018,6 +1023,12 @@ struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) > } > return folio_alloc_noprof(gfp, order); > } > +EXPORT_SYMBOL(filemap_alloc_folio_mpol_noprof); > + > +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) > +{ > + return filemap_alloc_folio_mpol_noprof(gfp, order, NULL); > +} > EXPORT_SYMBOL(filemap_alloc_folio_noprof); > #endif > > @@ -1881,11 +1892,12 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index) > } > > /** > - * __filemap_get_folio - Find and get a reference to a folio. > + * __filemap_get_folio_mpol - Find and get a reference to a folio. > * @mapping: The address_space to search. > * @index: The page index. > * @fgp_flags: %FGP flags modify how the folio is returned. > * @gfp: Memory allocation flags to use if %FGP_CREAT is specified. > + * @mpol: The mempolicy to apply. "The mempolicy to apply when allocating a new folio." ? > * > * Looks up the page cache entry at @mapping & @index. > * > @@ -1896,8 +1908,8 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index) > * > * Return: The found folio or an ERR_PTR() otherwise. > */ > -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, > - fgf_t fgp_flags, gfp_t gfp) > +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, pgoff_t index, > + fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol) > { > struct folio *folio; > > @@ -1967,7 +1979,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, > err = -ENOMEM; > if (order > min_order) > alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN; > - folio = filemap_alloc_folio(alloc_gfp, order); > + folio = filemap_alloc_folio_mpol(alloc_gfp, order, mpol); > if (!folio) > continue; > > @@ -2003,6 +2015,14 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, > folio_clear_dropbehind(folio); > return folio; > } > +EXPORT_SYMBOL(__filemap_get_folio_mpol); > + > +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, > + fgf_t fgp_flags, gfp_t gfp) > +{ > + return __filemap_get_folio_mpol(mapping, index, > + fgp_flags, gfp, NULL); > +} > EXPORT_SYMBOL(__filemap_get_folio); > > static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max, For guest_memfd, where pages are un-movable and un-swappable, the memory policy will never change later. shmem seems to handle the swap-in case, because it keeps care of allocating pages in that case itself. For ordinary pagecache pages (movable), page migration would likely not be aware of the specified mpol; I assume the same applies to shmem? alloc_migration_target() seems to prefer the current nid (nid = folio_nid(src)), but apart from that, does not lookup any mempolicy. compaction likely handles this by comapcting within a node/zone. Maybe migration to the right target node on misplacement is handled on a higher level lagter (numa hinting faults -> migrate_misplaced_folio). Likely at least for anon memory, not sure about unmapped shmem.
Hi David, Thanks for the review. On 2/12/2025 3:33 PM, David Hildenbrand wrote: > On 10.02.25 07:32, Shivank Garg wrote: >> From: Shivansh Dhiman <shivansh.dhiman@amd.com> >> >> Add NUMA mempolicy support to the filemap allocation path by introducing >> new APIs that take a mempolicy argument: >> - filemap_grab_folio_mpol() >> - filemap_alloc_folio_mpol() >> - __filemap_get_folio_mpol() >> >> These APIs allow callers to specify a NUMA policy during page cache >> allocations, enabling fine-grained control over memory placement. This is >> particularly needed by KVM when using guest-memfd memory backends, where >> the guest memory needs to be allocated according to the NUMA policy >> specified by VMM. >> > > shmem handles this using custom shmem_alloc_folio()->folio_alloc_mpol(). > > I'm curious, is there > > (1) A way to make shmem also use this new API? > (2) Handle it in guest_memfd manually, like shmem does? (1) As you noted later, shmem has unique requirements due to handling swapin. It does considerable open-coding. Initially, I was considering simplifying the shmem but it was not possible due to above constraints. One option would be to add shmem's special cases in the filemap and check for themusing shmem_mapping()? But, I don't understand the shmem internals well enough to determine if it is feasible. (2) I considered handling it manually in guest_memfd like shmem does, but this would lead to code duplication and more open-coding in guest_memfd. The current approach seems cleaner. > Two tabs indent on second parameter line, please. > .. > > This should go below the variable declaration. (and indentation on second parameter line should align with the first parameter) > .. > "The mempolicy to apply when allocating a new folio." ? > I'll address all the formatting and documentation issues in next posting. > > For guest_memfd, where pages are un-movable and un-swappable, the memory policy will never change later. > > shmem seems to handle the swap-in case, because it keeps care of allocating pages in that case itself. > > For ordinary pagecache pages (movable), page migration would likely not be aware of the specified mpol; I assume the same applies to shmem? > > alloc_migration_target() seems to prefer the current nid (nid = folio_nid(src)), but apart from that, does not lookup any mempolicy. Page migration does handle the NUMA mempolicy using mtc (struct migration_target_control *) which takes node ID input and allocates on the "preferred" node id. The target node in migrate_misplaced_folio() is obtained using get_vma_policy(), so the per-VMA policy handles proper node placement for mapped pages. It use current nid (folio_nid(src)) only if NUMA_NO_NODE is passed. mempolicy.c provides the alloc_migration_target_by_mpol() that allocates according to NUMA mempolicy, which is used by do_mbind(). > > compaction likely handles this by comapcting within a node/zone. > > Maybe migration to the right target node on misplacement is handled on a higher level lagter (numa hinting faults -> migrate_misplaced_folio). Likely at least for anon memory, not sure about unmapped shmem. Yes. Thanks, Shivank
> > (1) As you noted later, shmem has unique requirements due to handling swapin. > It does considerable open-coding. > Initially, I was considering simplifying the shmem but it was not possible due > to above constraints. > One option would be to add shmem's special cases in the filemap and check for > themusing shmem_mapping()? > But, I don't understand the shmem internals well enough to determine if it is > feasible. > Okay, thanks for looking into this. > (2) I considered handling it manually in guest_memfd like shmem does, but this > would lead to code duplication and more open-coding in guest_memfd. The current > approach seems cleaner. Okay, thanks. > >> Two tabs indent on second parameter line, please. >> > .. >> >> This should go below the variable declaration. (and indentation on second parameter line should align with the first parameter) >> > .. >> "The mempolicy to apply when allocating a new folio." ? >> > > I'll address all the formatting and documentation issues in next posting. > >> >> For guest_memfd, where pages are un-movable and un-swappable, the memory policy will never change later. >> >> shmem seems to handle the swap-in case, because it keeps care of allocating pages in that case itself. >> >> For ordinary pagecache pages (movable), page migration would likely not be aware of the specified mpol; I assume the same applies to shmem? >> >> alloc_migration_target() seems to prefer the current nid (nid = folio_nid(src)), but apart from that, does not lookup any mempolicy. > > Page migration does handle the NUMA mempolicy using mtc (struct migration_target_control *) > which takes node ID input and allocates on the "preferred" node id. > The target node in migrate_misplaced_folio() is obtained using get_vma_policy(), so the > per-VMA policy handles proper node placement for mapped pages. > It use current nid (folio_nid(src)) only if NUMA_NO_NODE is passed. > > mempolicy.c provides the alloc_migration_target_by_mpol() that allocates according to > NUMA mempolicy, which is used by do_mbind(). > >> >> compaction likely handles this by comapcting within a node/zone. >> >> Maybe migration to the right target node on misplacement is handled on a higher level lagter (numa hinting faults -> migrate_misplaced_folio). Likely at least for anon memory, not sure about unmapped shmem. > > Yes. Thanks, LGTM.
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 47bfc6b1b632..4ae7fa63cb26 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -662,15 +662,25 @@ static inline void *detach_page_private(struct page *page) #ifdef CONFIG_NUMA struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order); +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order, + struct mempolicy *mpol); #else static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) { return folio_alloc_noprof(gfp, order); } +static inline struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, + unsigned int order, + struct mempolicy *mpol) +{ + return filemap_alloc_folio_noprof(gfp, order); +} #endif #define filemap_alloc_folio(...) \ alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__)) +#define filemap_alloc_folio_mpol(...) \ + alloc_hooks(filemap_alloc_folio_mpol_noprof(__VA_ARGS__)) static inline struct page *__page_cache_alloc(gfp_t gfp) { @@ -762,6 +772,8 @@ static inline fgf_t fgf_set_order(size_t size) void *filemap_get_entry(struct address_space *mapping, pgoff_t index); struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp); +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, + pgoff_t index, fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol); struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp); @@ -820,6 +832,34 @@ static inline struct folio *filemap_grab_folio(struct address_space *mapping, mapping_gfp_mask(mapping)); } +/** + * filemap_grab_folio_mpol - grab a folio from the page cache + * @mapping: The address space to search + * @index: The page index + * @mpol: The mempolicy to apply + * + * Same as filemap_grab_folio(), except that it allocates the folio using + * given memory policy. + * + * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found + * and failed to create a folio. + */ +#ifdef CONFIG_NUMA +static inline struct folio *filemap_grab_folio_mpol(struct address_space *mapping, + pgoff_t index, struct mempolicy *mpol) +{ + return __filemap_get_folio_mpol(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping_gfp_mask(mapping), mpol); +} +#else +static inline struct folio *filemap_grab_folio_mpol(struct address_space *mapping, + pgoff_t index, struct mempolicy *mpol) +{ + return filemap_grab_folio(mapping, index); +} +#endif /* CONFIG_NUMA */ + /** * find_get_page - find and get a page reference * @mapping: the address_space to search diff --git a/mm/filemap.c b/mm/filemap.c index 804d7365680c..c5ea32702774 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1001,8 +1001,13 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio, EXPORT_SYMBOL_GPL(filemap_add_folio); #ifdef CONFIG_NUMA -struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order, + struct mempolicy *mpol) { + if (mpol) + return folio_alloc_mpol_noprof(gfp, order, mpol, + NO_INTERLEAVE_INDEX, numa_node_id()); + int n; struct folio *folio; @@ -1018,6 +1023,12 @@ struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) } return folio_alloc_noprof(gfp, order); } +EXPORT_SYMBOL(filemap_alloc_folio_mpol_noprof); + +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) +{ + return filemap_alloc_folio_mpol_noprof(gfp, order, NULL); +} EXPORT_SYMBOL(filemap_alloc_folio_noprof); #endif @@ -1881,11 +1892,12 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index) } /** - * __filemap_get_folio - Find and get a reference to a folio. + * __filemap_get_folio_mpol - Find and get a reference to a folio. * @mapping: The address_space to search. * @index: The page index. * @fgp_flags: %FGP flags modify how the folio is returned. * @gfp: Memory allocation flags to use if %FGP_CREAT is specified. + * @mpol: The mempolicy to apply. * * Looks up the page cache entry at @mapping & @index. * @@ -1896,8 +1908,8 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index) * * Return: The found folio or an ERR_PTR() otherwise. */ -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - fgf_t fgp_flags, gfp_t gfp) +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, pgoff_t index, + fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol) { struct folio *folio; @@ -1967,7 +1979,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, err = -ENOMEM; if (order > min_order) alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN; - folio = filemap_alloc_folio(alloc_gfp, order); + folio = filemap_alloc_folio_mpol(alloc_gfp, order, mpol); if (!folio) continue; @@ -2003,6 +2015,14 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, folio_clear_dropbehind(folio); return folio; } +EXPORT_SYMBOL(__filemap_get_folio_mpol); + +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, + fgf_t fgp_flags, gfp_t gfp) +{ + return __filemap_get_folio_mpol(mapping, index, + fgp_flags, gfp, NULL); +} EXPORT_SYMBOL(__filemap_get_folio); static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,