diff mbox series

[v3,17/25] mm: Add __page_cache_alloc_order

Message ID 20200429133657.22632-18-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series Large pages in the page cache | expand

Commit Message

Matthew Wilcox April 29, 2020, 1:36 p.m. UTC
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

This new function allows page cache pages to be allocated that are
larger than an order-0 page.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 include/linux/pagemap.h | 24 +++++++++++++++++++++---
 mm/filemap.c            | 12 ++++++++----
 2 files changed, 29 insertions(+), 7 deletions(-)

Comments

Yang Shi May 6, 2020, 6:03 p.m. UTC | #1
On Wed, Apr 29, 2020 at 6:37 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
>
> This new function allows page cache pages to be allocated that are
> larger than an order-0 page.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  include/linux/pagemap.h | 24 +++++++++++++++++++++---
>  mm/filemap.c            | 12 ++++++++----
>  2 files changed, 29 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 55199cb5bd66..1169e2428dd7 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -205,15 +205,33 @@ static inline int page_cache_add_speculative(struct page *page, int count)
>         return __page_cache_add_speculative(page, count);
>  }
>
> +static inline gfp_t thp_gfpmask(gfp_t gfp)
> +{
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +       /* We'd rather allocate smaller pages than stall a page fault */
> +       gfp |= GFP_TRANSHUGE_LIGHT;

This looks not correct. GFP_TRANSHUGE_LIGHT may set GFP_FS, but some
filesystem may expect GFP_NOFS, i.e. in readahead path.

> +       gfp &= ~__GFP_DIRECT_RECLAIM;
> +#endif
> +       return gfp;
> +}
> +
>  #ifdef CONFIG_NUMA
> -extern struct page *__page_cache_alloc(gfp_t gfp);
> +extern struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order);
>  #else
> -static inline struct page *__page_cache_alloc(gfp_t gfp)
> +static inline
> +struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order)
>  {
> -       return alloc_pages(gfp, 0);
> +       if (order == 0)
> +               return alloc_pages(gfp, 0);
> +       return prep_transhuge_page(alloc_pages(thp_gfpmask(gfp), order));
>  }
>  #endif
>
> +static inline struct page *__page_cache_alloc(gfp_t gfp)
> +{
> +       return __page_cache_alloc_order(gfp, 0);
> +}
> +
>  static inline struct page *page_cache_alloc(struct address_space *x)
>  {
>         return __page_cache_alloc(mapping_gfp_mask(x));
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 23a051a7ef0f..9abba062973a 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -941,24 +941,28 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
>  EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
>
>  #ifdef CONFIG_NUMA
> -struct page *__page_cache_alloc(gfp_t gfp)
> +struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order)
>  {
>         int n;
>         struct page *page;
>
> +       if (order > 0)
> +               gfp = thp_gfpmask(gfp);
> +
>         if (cpuset_do_page_mem_spread()) {
>                 unsigned int cpuset_mems_cookie;
>                 do {
>                         cpuset_mems_cookie = read_mems_allowed_begin();
>                         n = cpuset_mem_spread_node();
> -                       page = __alloc_pages_node(n, gfp, 0);
> +                       page = __alloc_pages_node(n, gfp, order);
> +                       prep_transhuge_page(page);
>                 } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
>
>                 return page;
>         }
> -       return alloc_pages(gfp, 0);
> +       return prep_transhuge_page(alloc_pages(gfp, order));
>  }
> -EXPORT_SYMBOL(__page_cache_alloc);
> +EXPORT_SYMBOL(__page_cache_alloc_order);
>  #endif
>
>  /*
> --
> 2.26.2
>
>
Matthew Wilcox June 7, 2020, 3:08 a.m. UTC | #2
On Wed, May 06, 2020 at 11:03:06AM -0700, Yang Shi wrote:
> > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > index 55199cb5bd66..1169e2428dd7 100644
> > --- a/include/linux/pagemap.h
> > +++ b/include/linux/pagemap.h
> > @@ -205,15 +205,33 @@ static inline int page_cache_add_speculative(struct page *page, int count)
> >         return __page_cache_add_speculative(page, count);
> >  }
> >
> > +static inline gfp_t thp_gfpmask(gfp_t gfp)
> > +{
> > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > +       /* We'd rather allocate smaller pages than stall a page fault */
> > +       gfp |= GFP_TRANSHUGE_LIGHT;
> 
> This looks not correct. GFP_TRANSHUGE_LIGHT may set GFP_FS, but some
> filesystem may expect GFP_NOFS, i.e. in readahead path.

Apologies, I overlooked this mail.

In one of the prerequisite patches for this patch set (which is now merged
as f2c817bed58d9be2051fad1d18e167e173c0c227), we call memalloc_nofs_save()
in the readahead path.  That ensures all allocations will have GFP_NOFS
set by the time the page allocator sees them.

Thanks for checking on this.
Yang Shi June 9, 2020, 5:38 p.m. UTC | #3
On Sat, Jun 6, 2020 at 8:08 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Wed, May 06, 2020 at 11:03:06AM -0700, Yang Shi wrote:
> > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > > index 55199cb5bd66..1169e2428dd7 100644
> > > --- a/include/linux/pagemap.h
> > > +++ b/include/linux/pagemap.h
> > > @@ -205,15 +205,33 @@ static inline int page_cache_add_speculative(struct page *page, int count)
> > >         return __page_cache_add_speculative(page, count);
> > >  }
> > >
> > > +static inline gfp_t thp_gfpmask(gfp_t gfp)
> > > +{
> > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > > +       /* We'd rather allocate smaller pages than stall a page fault */
> > > +       gfp |= GFP_TRANSHUGE_LIGHT;
> >
> > This looks not correct. GFP_TRANSHUGE_LIGHT may set GFP_FS, but some
> > filesystem may expect GFP_NOFS, i.e. in readahead path.
>
> Apologies, I overlooked this mail.
>
> In one of the prerequisite patches for this patch set (which is now merged
> as f2c817bed58d9be2051fad1d18e167e173c0c227), we call memalloc_nofs_save()
> in the readahead path.  That ensures all allocations will have GFP_NOFS
> set by the time the page allocator sees them.
>
> Thanks for checking on this.

Aha, yes, correct. I missed that. Thanks for finding that commit.
diff mbox series

Patch

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 55199cb5bd66..1169e2428dd7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -205,15 +205,33 @@  static inline int page_cache_add_speculative(struct page *page, int count)
 	return __page_cache_add_speculative(page, count);
 }
 
+static inline gfp_t thp_gfpmask(gfp_t gfp)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	/* We'd rather allocate smaller pages than stall a page fault */
+	gfp |= GFP_TRANSHUGE_LIGHT;
+	gfp &= ~__GFP_DIRECT_RECLAIM;
+#endif
+	return gfp;
+}
+
 #ifdef CONFIG_NUMA
-extern struct page *__page_cache_alloc(gfp_t gfp);
+extern struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order);
 #else
-static inline struct page *__page_cache_alloc(gfp_t gfp)
+static inline
+struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order)
 {
-	return alloc_pages(gfp, 0);
+	if (order == 0)
+		return alloc_pages(gfp, 0);
+	return prep_transhuge_page(alloc_pages(thp_gfpmask(gfp), order));
 }
 #endif
 
+static inline struct page *__page_cache_alloc(gfp_t gfp)
+{
+	return __page_cache_alloc_order(gfp, 0);
+}
+
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
 	return __page_cache_alloc(mapping_gfp_mask(x));
diff --git a/mm/filemap.c b/mm/filemap.c
index 23a051a7ef0f..9abba062973a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -941,24 +941,28 @@  int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
 
 #ifdef CONFIG_NUMA
-struct page *__page_cache_alloc(gfp_t gfp)
+struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order)
 {
 	int n;
 	struct page *page;
 
+	if (order > 0)
+		gfp = thp_gfpmask(gfp);
+
 	if (cpuset_do_page_mem_spread()) {
 		unsigned int cpuset_mems_cookie;
 		do {
 			cpuset_mems_cookie = read_mems_allowed_begin();
 			n = cpuset_mem_spread_node();
-			page = __alloc_pages_node(n, gfp, 0);
+			page = __alloc_pages_node(n, gfp, order);
+			prep_transhuge_page(page);
 		} while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
 
 		return page;
 	}
-	return alloc_pages(gfp, 0);
+	return prep_transhuge_page(alloc_pages(gfp, order));
 }
-EXPORT_SYMBOL(__page_cache_alloc);
+EXPORT_SYMBOL(__page_cache_alloc_order);
 #endif
 
 /*