diff mbox series

[RFC,31/31] sysctl: toggle to promote PUD-mapped 1GB THP or not.

Message ID 20190215220856.29749-32-zi.yan@sent.com (mailing list archive)
State New, archived
Headers show
Series Generating physically contiguous memory after page allocation | expand

Commit Message

Zi Yan Feb. 15, 2019, 10:08 p.m. UTC
From: Zi Yan <ziy@nvidia.com>

Only promotion PMD THP by default.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 kernel/sysctl.c | 11 +++++++++++
 mm/mem_defrag.c | 17 +++++++++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 762535a2c7d1..20263d2c39b9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -121,6 +121,7 @@  extern int vma_scan_threshold_type;
 extern int vma_no_repeat_defrag;
 extern int num_breakout_chunks;
 extern int defrag_size_threshold;
+extern int mem_defrag_promote_thp;
 
 extern int only_print_head_pfn;
 
@@ -135,6 +136,7 @@  static int zero;
 static int __maybe_unused one = 1;
 static int __maybe_unused two = 2;
 static int __maybe_unused four = 4;
+static int __maybe_unused fifteen = 15;
 static unsigned long one_ul = 1;
 static int one_hundred = 100;
 static int one_thousand = 1000;
@@ -1761,6 +1763,15 @@  static struct ctl_table vm_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "mem_defrag_promote_thp",
+		.data		= &mem_defrag_promote_thp,
+		.maxlen		= sizeof(mem_defrag_promote_thp),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &fifteen,
+	},
 	{ }
 };
 
diff --git a/mm/mem_defrag.c b/mm/mem_defrag.c
index d7a579924d12..7cfa99351925 100644
--- a/mm/mem_defrag.c
+++ b/mm/mem_defrag.c
@@ -64,12 +64,18 @@  enum {
 	VMA_THRESHOLD_TYPE_SIZE,
 };
 
+#define PROMOTE_PMD_MAP  (0x8)
+#define PROMOTE_PMD_PAGE (0x4)
+#define PROMOTE_PUD_MAP  (0x2)
+#define PROMOTE_PUD_PAGE (0x1)
+
 int num_breakout_chunks;
 int vma_scan_percentile = 100;
 int vma_scan_threshold_type = VMA_THRESHOLD_TYPE_TIME;
 int vma_no_repeat_defrag;
 int kmem_defragd_always;
 int defrag_size_threshold = 5;
+int mem_defrag_promote_thp = (PROMOTE_PMD_MAP|PROMOTE_PMD_PAGE);
 static DEFINE_SPINLOCK(kmem_defragd_mm_lock);
 
 #define MM_SLOTS_HASH_BITS 10
@@ -1613,7 +1619,8 @@  static int kmem_defragd_scan_mm(struct defrag_scan_control *sc)
 						/* defrag works for the whole chunk,
 						 * promote to THP in place
 						 */
-						if (!defrag_result &&
+						if ((mem_defrag_promote_thp & PROMOTE_PMD_PAGE) &&
+							!defrag_result &&
 							/* skip existing THPs */
 							defrag_stats.aligned_max_order < HPAGE_PMD_ORDER &&
 							!(*scan_address & (HPAGE_PMD_SIZE-1)) &&
@@ -1628,7 +1635,8 @@  static int kmem_defragd_scan_mm(struct defrag_scan_control *sc)
 								 * still PTE pointed
 								 */
 								/* promote PTE-mapped THP to PMD-mapped */
-								promote_huge_pmd_address(vma, *scan_address);
+								if (mem_defrag_promote_thp & PROMOTE_PMD_MAP)
+									promote_huge_pmd_address(vma, *scan_address);
 							}
 							up_write(&mm->mmap_sem);
 						}
@@ -1654,7 +1662,8 @@  static int kmem_defragd_scan_mm(struct defrag_scan_control *sc)
 				}
 
 				/* defrag works for the whole chunk, promote to PUD THP in place */
-				if (!nr_fails_in_1gb_range &&
+				if ((mem_defrag_promote_thp & PROMOTE_PUD_PAGE) &&
+					!nr_fails_in_1gb_range &&
 					!skip_promotion && /* avoid existing THP */
 					!(defrag_begin & (HPAGE_PUD_SIZE-1)) &&
 					!(defrag_end & (HPAGE_PUD_SIZE-1))) {
@@ -1668,7 +1677,7 @@  static int kmem_defragd_scan_mm(struct defrag_scan_control *sc)
 						 * still PMD pointed
 						 */
 						/* promote PMD-mapped THP to PUD-mapped */
-						if (mem_defrag_promote_1gb_thp)
+						if (mem_defrag_promote_thp & PROMOTE_PUD_MAP)
 							promote_huge_pud_address(vma, defrag_begin);
 					}
 					up_write(&mm->mmap_sem);