diff mbox series

[RFC,v2,12/21] x86/pgtable: allocate page table pages from DRAM

Message ID 20181226133351.770245668@intel.com (mailing list archive)
State New, archived
Headers show
Series PMEM NUMA node and hotness accounting/migration | expand

Commit Message

Fengguang Wu Dec. 26, 2018, 1:14 p.m. UTC
On rand read/writes on large data, we find near half memory accesses
caused by TLB misses, hence hit the page table pages. So better keep
page table pages in faster DRAM nodes.

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
---
 arch/x86/include/asm/pgalloc.h |   10 +++++++---
 arch/x86/mm/pgtable.c          |   22 ++++++++++++++++++----
 2 files changed, 25 insertions(+), 7 deletions(-)
diff mbox series

Patch

--- linux.orig/arch/x86/mm/pgtable.c	2018-12-26 19:41:57.494900885 +0800
+++ linux/arch/x86/mm/pgtable.c	2018-12-26 19:42:35.531621035 +0800
@@ -22,17 +22,30 @@  EXPORT_SYMBOL(physical_mask);
 #endif
 
 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
+nodemask_t all_node_mask = NODE_MASK_ALL;
+
+unsigned long __get_free_pgtable_pages(gfp_t gfp_mask,
+						     unsigned int order)
+{
+	struct page *page;
+
+	page = __alloc_pages_nodemask(gfp_mask, order, numa_node_id(), &all_node_mask);
+	if (!page)
+		return 0;
+	return (unsigned long) page_address(page);
+}
+EXPORT_SYMBOL(__get_free_pgtable_pages);
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
+	return (pte_t *)__get_free_pgtable_pages(PGALLOC_GFP & ~__GFP_ACCOUNT, 0);
 }
 
 pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
 
-	pte = alloc_pages(__userpte_alloc_gfp, 0);
+	pte = __alloc_pages_nodemask(__userpte_alloc_gfp, 0, numa_node_id(), &all_node_mask);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
@@ -241,7 +254,7 @@  static int preallocate_pmds(struct mm_st
 		gfp &= ~__GFP_ACCOUNT;
 
 	for (i = 0; i < count; i++) {
-		pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
+		pmd_t *pmd = (pmd_t *)__get_free_pgtable_pages(gfp, 0);
 		if (!pmd)
 			failed = true;
 		if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
@@ -422,7 +435,8 @@  static inline void _pgd_free(pgd_t *pgd)
 
 static inline pgd_t *_pgd_alloc(void)
 {
-	return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
+	return (pgd_t *)__get_free_pgtable_pages(PGALLOC_GFP,
+						 PGD_ALLOCATION_ORDER);
 }
 
 static inline void _pgd_free(pgd_t *pgd)
--- linux.orig/arch/x86/include/asm/pgalloc.h	2018-12-26 19:40:12.992251270 +0800
+++ linux/arch/x86/include/asm/pgalloc.h	2018-12-26 19:42:35.531621035 +0800
@@ -96,10 +96,11 @@  static inline pmd_t *pmd_alloc_one(struc
 {
 	struct page *page;
 	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
+	nodemask_t all_node_mask = NODE_MASK_ALL;
 
 	if (mm == &init_mm)
 		gfp &= ~__GFP_ACCOUNT;
-	page = alloc_pages(gfp, 0);
+	page = __alloc_pages_nodemask(gfp, 0, numa_node_id(), &all_node_mask);
 	if (!page)
 		return NULL;
 	if (!pgtable_pmd_page_ctor(page)) {
@@ -141,13 +142,16 @@  static inline void p4d_populate(struct m
 	set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
 }
 
+extern unsigned long __get_free_pgtable_pages(gfp_t gfp_mask,
+					      unsigned int order);
+
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	gfp_t gfp = GFP_KERNEL_ACCOUNT;
 
 	if (mm == &init_mm)
 		gfp &= ~__GFP_ACCOUNT;
-	return (pud_t *)get_zeroed_page(gfp);
+	return (pud_t *)__get_free_pgtable_pages(gfp | __GFP_ZERO, 0);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -179,7 +183,7 @@  static inline p4d_t *p4d_alloc_one(struc
 
 	if (mm == &init_mm)
 		gfp &= ~__GFP_ACCOUNT;
-	return (p4d_t *)get_zeroed_page(gfp);
+	return (p4d_t *)__get_free_pgtable_pages(gfp | __GFP_ZERO, 0);
 }
 
 static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)