diff mbox series

[RFC,v2,26/30] mm: thp: add a global knob to enable/disable PUD THPs.

Message ID 20200928175428.4110504-27-zi.yan@sent.com (mailing list archive)
State New, archived
Headers show
Series 1GB PUD THP support on x86_64 | expand

Commit Message

Zi Yan Sept. 28, 2020, 5:54 p.m. UTC
From: Zi Yan <ziy@nvidia.com>

Like the existing global PMD THP knob, it allows user to enable/disable
PUD THPs. PUD THP is disabled by default unless user knows the
performance tradeoff of using it, like longer first time page fault
due to larger page zeroing and longer page allocation time when memory
is fragmented. Experienced user can enable it and take advantage of its
benefit of suffering fewer page faults and TLB misses.

* always means PUD THPs will be allocated on all VMAs if possible.
* madvise means PUD THPs will be allocated if vm_flags has VM_HUGEPAGE_PUD
  set via madvise syscall using MADV_HUGEPAGE | MADV_HUGEPAGE_PUD.
* none means PUD THPs will not be allocated on any VMA.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 include/linux/huge_mm.h | 14 ++++++++++++++
 mm/huge_memory.c        | 38 ++++++++++++++++++++++++++++++++++++++
 mm/memory.c             |  2 +-
 3 files changed, 53 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index c7bc40c4a5e2..0d0f9cf25aeb 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -119,6 +119,8 @@  enum transparent_hugepage_flag {
 #ifdef CONFIG_DEBUG_VM
 	TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG,
 #endif
+	TRANSPARENT_PUD_HUGEPAGE_FLAG,
+	TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG,
 };
 
 struct kobject;
@@ -184,6 +186,18 @@  static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
 }
 
 bool transparent_hugepage_enabled(struct vm_area_struct *vma);
+static inline bool transparent_pud_hugepage_enabled(struct vm_area_struct *vma)
+{
+	if (transparent_hugepage_enabled(vma)) {
+		if (transparent_hugepage_flags & (1 << TRANSPARENT_PUD_HUGEPAGE_FLAG))
+			return true;
+		if (transparent_hugepage_flags &
+					(1 << TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG))
+			return !!(vma->vm_flags & VM_HUGEPAGE_PUD);
+	}
+
+	return false;
+}
 
 #define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1)
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 61ae7a0ded84..1965753b31a2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -199,6 +199,43 @@  static ssize_t enabled_store(struct kobject *kobj,
 static struct kobj_attribute enabled_attr =
 	__ATTR(enabled, 0644, enabled_show, enabled_store);
 
+static ssize_t enabled_pud_thp_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *buf)
+{
+	if (test_bit(TRANSPARENT_PUD_HUGEPAGE_FLAG, &transparent_hugepage_flags))
+		return sprintf(buf, "[always] madvise never\n");
+	else if (test_bit(TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags))
+		return sprintf(buf, "always [madvise] never\n");
+	else
+		return sprintf(buf, "always madvise [never]\n");
+}
+
+static ssize_t enabled_pud_thp_store(struct kobject *kobj,
+			     struct kobj_attribute *attr,
+			     const char *buf, size_t count)
+{
+	ssize_t ret = count;
+
+	if (!memcmp("always", buf,
+		    min(sizeof("always")-1, count))) {
+		clear_bit(TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags);
+		set_bit(TRANSPARENT_PUD_HUGEPAGE_FLAG, &transparent_hugepage_flags);
+	} else if (!memcmp("madvise", buf,
+			   min(sizeof("madvise")-1, count))) {
+		clear_bit(TRANSPARENT_PUD_HUGEPAGE_FLAG, &transparent_hugepage_flags);
+		set_bit(TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags);
+	} else if (!memcmp("never", buf,
+			   min(sizeof("never")-1, count))) {
+		clear_bit(TRANSPARENT_PUD_HUGEPAGE_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags);
+	} else
+		ret = -EINVAL;
+
+	return ret;
+}
+static struct kobj_attribute enabled_pud_thp_attr =
+	__ATTR(enabled_pud_thp, 0644, enabled_pud_thp_show, enabled_pud_thp_store);
+
 ssize_t single_hugepage_flag_show(struct kobject *kobj,
 				struct kobj_attribute *attr, char *buf,
 				enum transparent_hugepage_flag flag)
@@ -305,6 +342,7 @@  static struct kobj_attribute hpage_pmd_size_attr =
 
 static struct attribute *hugepage_attr[] = {
 	&enabled_attr.attr,
+	&enabled_pud_thp_attr.attr,
 	&defrag_attr.attr,
 	&use_zero_page_attr.attr,
 	&hpage_pmd_size_attr.attr,
diff --git a/mm/memory.c b/mm/memory.c
index ab80d13807aa..9f7b509a3aa7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4282,7 +4282,7 @@  static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 	if (!vmf.pud)
 		return VM_FAULT_OOM;
 retry_pud:
-	if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) {
+	if (pud_none(*vmf.pud) && transparent_pud_hugepage_enabled(vma)) {
 		ret = create_huge_pud(&vmf);
 		if (!(ret & VM_FAULT_FALLBACK))
 			return ret;