mbox series

[0/3] Large pages in the page cache

Message ID 20190905182348.5319-1-willy@infradead.org (mailing list archive)
Headers show
Series Large pages in the page cache | expand

Message

Matthew Wilcox Sept. 5, 2019, 6:23 p.m. UTC
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

Michael Hocko's reaction to Bill's implementation of filemap_huge_fault
was "convoluted so much I cannot wrap my head around it".  This spurred me
to finish up something I'd been working on in the background prompted by
Kirill's desire to be able to allocate large page cache pages in paths
other than the fault handler.

This is in no sense complete as there's nothing in this patch series
which actually uses FGP_PMD.  It should remove a lot of the complexity
from a future filemap_huge_fault() implementation and make it possible
to allocate larger pages in the read/write paths in future.

Matthew Wilcox (Oracle) (3):
  mm: Add __page_cache_alloc_order
  mm: Allow large pages to be added to the page cache
  mm: Allow find_get_page to be used for large pages

 include/linux/pagemap.h |  23 ++++++-
 mm/filemap.c            | 132 +++++++++++++++++++++++++++++++++-------
 2 files changed, 130 insertions(+), 25 deletions(-)

Comments

Matthew Wilcox Sept. 6, 2019, 3:59 p.m. UTC | #1
Bill pointed out I'd forgotten to call prep_transhuge_page().  I'll
fold this into some of the other commits, but this is what I'm thinking
of doing in case anyone has a better idea:

Basically, I prefer being able to do this:

-	return alloc_pages(gfp, order);
+	return prep_transhuge_page(alloc_pages(gfp, order));

to this:

+	struct page *page;
-	return alloc_pages(gfp, order);
+	page = alloc_pages(gfp, order);
+	if (page && (gfp & __GFP_COMP))
+		prep_transhuge_page(page);
+	return page;

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 45ede62aa85b..159e63438806 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -153,7 +153,7 @@ extern unsigned long thp_get_unmapped_area(struct file *filp,
 		unsigned long addr, unsigned long len, unsigned long pgoff,
 		unsigned long flags);
 
-extern void prep_transhuge_page(struct page *page);
+extern struct page *prep_transhuge_page(struct page *page);
 extern void free_transhuge_page(struct page *page);
 
 bool can_split_huge_page(struct page *page, int *pextra_pins);
@@ -294,7 +294,10 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
 	return false;
 }
 
-static inline void prep_transhuge_page(struct page *page) {}
+static inline struct page *prep_transhuge_page(struct page *page)
+{
+	return page;
+}
 
 #define transparent_hugepage_flags 0UL
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 72101811524c..8b9d672d868c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -215,7 +215,7 @@ struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order)
 {
 	if (order > 0)
 		gfp |= __GFP_COMP;
-	return alloc_pages(gfp, order);
+	return prep_transhuge_page(alloc_pages(gfp, order));
 }
 #endif
 
diff --git a/mm/filemap.c b/mm/filemap.c
index a7fa3a50f750..c2b11799b968 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -986,11 +986,12 @@ struct page *__page_cache_alloc_order(gfp_t gfp, unsigned int order)
 			cpuset_mems_cookie = read_mems_allowed_begin();
 			n = cpuset_mem_spread_node();
 			page = __alloc_pages_node(n, gfp, order);
+			prep_transhuge_page(page);
 		} while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
 
 		return page;
 	}
-	return alloc_pages(gfp, order);
+	return prep_transhuge_page(alloc_pages(gfp, order));
 }
 EXPORT_SYMBOL(__page_cache_alloc_order);
 #endif
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 483b07b2d6ae..3961af907dd7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -502,15 +502,20 @@ static inline struct list_head *page_deferred_list(struct page *page)
 	return &page[2].deferred_list;
 }
 
-void prep_transhuge_page(struct page *page)
+struct page *prep_transhuge_page(struct page *page)
 {
+	if (!page || compound_order(page) == 0)
+		return page;
 	/*
-	 * we use page->mapping and page->indexlru in second tail page
+	 * we use page->mapping and page->index in second tail page
 	 * as list_head: assuming THP order >= 2
 	 */
+	BUG_ON(compound_order(page) == 1);
 
 	INIT_LIST_HEAD(page_deferred_list(page));
 	set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
+
+	return page;
 }
 
 static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len,