@@ -33,6 +33,7 @@
#include <linux/oom.h>
#include <linux/numa.h>
#include <linux/page_owner.h>
+#include <linux/sched/sysctl.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
@@ -1967,17 +1968,28 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
}
#endif
- /*
- * Avoid trapping faults against the zero page. The read-only
- * data is likely to be read-cached on the local CPU and
- * local/remote hits to the zero page are not interesting.
- */
- if (prot_numa && is_huge_zero_pmd(*pmd))
- goto unlock;
+ if (prot_numa) {
+ struct page *page;
+ /*
+ * Avoid trapping faults against the zero page. The read-only
+ * data is likely to be read-cached on the local CPU and
+ * local/remote hits to the zero page are not interesting.
+ */
+ if (is_huge_zero_pmd(*pmd))
+ goto unlock;
- if (prot_numa && pmd_protnone(*pmd))
- goto unlock;
+ if (pmd_protnone(*pmd))
+ goto unlock;
+ page = pmd_page(*pmd);
+ /*
+ * Skip if normal numa balancing is disabled and no
+ * faster memory node to promote to
+ */
+ if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
+ next_promotion_node(page_to_nid(page)) == -1)
+ goto unlock;
+ }
/*
* In case prot_numa, we are under down_read(mmap_sem). It's critical
* to not clear pmd intermittently to avoid race with MADV_DONTNEED
@@ -28,6 +28,7 @@
#include <linux/ksm.h>
#include <linux/uaccess.h>
#include <linux/mm_inline.h>
+#include <linux/sched/sysctl.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
@@ -79,6 +80,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
*/
if (prot_numa) {
struct page *page;
+ int nid;
/* Avoid TLB flush if possible */
if (pte_protnone(oldpte))
@@ -105,7 +107,17 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
* Don't mess with PTEs if page is already on the node
* a single-threaded process is running on.
*/
- if (target_node == page_to_nid(page))
+ nid = page_to_nid(page);
+ if (target_node == nid)
+ continue;
+
+ /*
+ * Skip scanning if normal numa
+ * balancing is disabled and no faster
+ * memory node to promote to
+ */
+ if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
+ next_promotion_node(nid) == -1)
continue;
}