Message ID | 20200429133657.22632-18-willy@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Large pages in the page cache | expand |
On Wed, Apr 29, 2020 at 6:37 AM Matthew Wilcox <willy@infradead.org> wrote: > > From: "Matthew Wilcox (Oracle)" <willy@infradead.org> > > This new function allows page cache pages to be allocated that are > larger than an order-0 page. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> > --- > include/linux/pagemap.h | 24 +++++++++++++++++++++--- > mm/filemap.c | 12 ++++++++---- > 2 files changed, 29 insertions(+), 7 deletions(-) > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index 55199cb5bd66..1169e2428dd7 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -205,15 +205,33 @@ static inline int page_cache_add_speculative(struct page *page, int count) > return __page_cache_add_speculative(page, count); > } > > +static inline gfp_t thp_gfpmask(gfp_t gfp) > +{ > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > + /* We'd rather allocate smaller pages than stall a page fault */ > + gfp |= GFP_TRANSHUGE_LIGHT; This looks not correct. GFP_TRANSHUGE_LIGHT may set GFP_FS, but some filesystem may expect GFP_NOFS, i.e. in readahead path. > + gfp &= ~__GFP_DIRECT_RECLAIM; > +#endif > + return gfp; > +} > + > #ifdef CONFIG_NUMA > -extern struct page *__page_cache_alloc(gfp_t gfp); > +extern struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order); > #else > -static inline struct page *__page_cache_alloc(gfp_t gfp) > +static inline > +struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order) > { > - return alloc_pages(gfp, 0); > + if (order == 0) > + return alloc_pages(gfp, 0); > + return prep_transhuge_page(alloc_pages(thp_gfpmask(gfp), order)); > } > #endif > > +static inline struct page *__page_cache_alloc(gfp_t gfp) > +{ > + return __page_cache_alloc_order(gfp, 0); > +} > + > static inline struct page *page_cache_alloc(struct address_space *x) > { > return __page_cache_alloc(mapping_gfp_mask(x)); > diff --git a/mm/filemap.c b/mm/filemap.c > index 23a051a7ef0f..9abba062973a 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -941,24 +941,28 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, > EXPORT_SYMBOL_GPL(add_to_page_cache_lru); > > #ifdef CONFIG_NUMA > -struct page *__page_cache_alloc(gfp_t gfp) > +struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order) > { > int n; > struct page *page; > > + if (order > 0) > + gfp = thp_gfpmask(gfp); > + > if (cpuset_do_page_mem_spread()) { > unsigned int cpuset_mems_cookie; > do { > cpuset_mems_cookie = read_mems_allowed_begin(); > n = cpuset_mem_spread_node(); > - page = __alloc_pages_node(n, gfp, 0); > + page = __alloc_pages_node(n, gfp, order); > + prep_transhuge_page(page); > } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); > > return page; > } > - return alloc_pages(gfp, 0); > + return prep_transhuge_page(alloc_pages(gfp, order)); > } > -EXPORT_SYMBOL(__page_cache_alloc); > +EXPORT_SYMBOL(__page_cache_alloc_order); > #endif > > /* > -- > 2.26.2 > >
On Wed, May 06, 2020 at 11:03:06AM -0700, Yang Shi wrote: > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > > index 55199cb5bd66..1169e2428dd7 100644 > > --- a/include/linux/pagemap.h > > +++ b/include/linux/pagemap.h > > @@ -205,15 +205,33 @@ static inline int page_cache_add_speculative(struct page *page, int count) > > return __page_cache_add_speculative(page, count); > > } > > > > +static inline gfp_t thp_gfpmask(gfp_t gfp) > > +{ > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > > + /* We'd rather allocate smaller pages than stall a page fault */ > > + gfp |= GFP_TRANSHUGE_LIGHT; > > This looks not correct. GFP_TRANSHUGE_LIGHT may set GFP_FS, but some > filesystem may expect GFP_NOFS, i.e. in readahead path. Apologies, I overlooked this mail. In one of the prerequisite patches for this patch set (which is now merged as f2c817bed58d9be2051fad1d18e167e173c0c227), we call memalloc_nofs_save() in the readahead path. That ensures all allocations will have GFP_NOFS set by the time the page allocator sees them. Thanks for checking on this.
On Sat, Jun 6, 2020 at 8:08 PM Matthew Wilcox <willy@infradead.org> wrote: > > On Wed, May 06, 2020 at 11:03:06AM -0700, Yang Shi wrote: > > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > > > index 55199cb5bd66..1169e2428dd7 100644 > > > --- a/include/linux/pagemap.h > > > +++ b/include/linux/pagemap.h > > > @@ -205,15 +205,33 @@ static inline int page_cache_add_speculative(struct page *page, int count) > > > return __page_cache_add_speculative(page, count); > > > } > > > > > > +static inline gfp_t thp_gfpmask(gfp_t gfp) > > > +{ > > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > > > + /* We'd rather allocate smaller pages than stall a page fault */ > > > + gfp |= GFP_TRANSHUGE_LIGHT; > > > > This looks not correct. GFP_TRANSHUGE_LIGHT may set GFP_FS, but some > > filesystem may expect GFP_NOFS, i.e. in readahead path. > > Apologies, I overlooked this mail. > > In one of the prerequisite patches for this patch set (which is now merged > as f2c817bed58d9be2051fad1d18e167e173c0c227), we call memalloc_nofs_save() > in the readahead path. That ensures all allocations will have GFP_NOFS > set by the time the page allocator sees them. > > Thanks for checking on this. Aha, yes, correct. I missed that. Thanks for finding that commit.
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 55199cb5bd66..1169e2428dd7 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -205,15 +205,33 @@ static inline int page_cache_add_speculative(struct page *page, int count) return __page_cache_add_speculative(page, count); } +static inline gfp_t thp_gfpmask(gfp_t gfp) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + /* We'd rather allocate smaller pages than stall a page fault */ + gfp |= GFP_TRANSHUGE_LIGHT; + gfp &= ~__GFP_DIRECT_RECLAIM; +#endif + return gfp; +} + #ifdef CONFIG_NUMA -extern struct page *__page_cache_alloc(gfp_t gfp); +extern struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order); #else -static inline struct page *__page_cache_alloc(gfp_t gfp) +static inline +struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order) { - return alloc_pages(gfp, 0); + if (order == 0) + return alloc_pages(gfp, 0); + return prep_transhuge_page(alloc_pages(thp_gfpmask(gfp), order)); } #endif +static inline struct page *__page_cache_alloc(gfp_t gfp) +{ + return __page_cache_alloc_order(gfp, 0); +} + static inline struct page *page_cache_alloc(struct address_space *x) { return __page_cache_alloc(mapping_gfp_mask(x)); diff --git a/mm/filemap.c b/mm/filemap.c index 23a051a7ef0f..9abba062973a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -941,24 +941,28 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, EXPORT_SYMBOL_GPL(add_to_page_cache_lru); #ifdef CONFIG_NUMA -struct page *__page_cache_alloc(gfp_t gfp) +struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order) { int n; struct page *page; + if (order > 0) + gfp = thp_gfpmask(gfp); + if (cpuset_do_page_mem_spread()) { unsigned int cpuset_mems_cookie; do { cpuset_mems_cookie = read_mems_allowed_begin(); n = cpuset_mem_spread_node(); - page = __alloc_pages_node(n, gfp, 0); + page = __alloc_pages_node(n, gfp, order); + prep_transhuge_page(page); } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); return page; } - return alloc_pages(gfp, 0); + return prep_transhuge_page(alloc_pages(gfp, order)); } -EXPORT_SYMBOL(__page_cache_alloc); +EXPORT_SYMBOL(__page_cache_alloc_order); #endif /*