@@ -98,6 +98,14 @@ struct collapse_control {
int node);
};
+/* Gather information from one khugepaged_scan_[pmd|file]() request */
+struct collapse_result {
+ enum scan_result result;
+
+ /* Was mmap_lock dropped during request? */
+ bool dropped_mmap_lock;
+};
+
/**
* struct mm_slot - hash lookup from mm to mm_slot
* @hash: hash collision list
@@ -742,13 +750,13 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
result = SCAN_SUCCEED;
trace_mm_collapse_huge_page_isolate(page, none_or_zero,
referenced, writable, result);
- return 1;
+ return SCAN_SUCCEED;
}
out:
release_pte_pages(pte, _pte, compound_pagelist);
trace_mm_collapse_huge_page_isolate(page, none_or_zero,
referenced, writable, result);
- return 0;
+ return result;
}
static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
@@ -1086,7 +1094,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
struct collapse_control *cc, int referenced,
- int unmapped)
+ int unmapped, struct collapse_result *cr)
{
LIST_HEAD(compound_pagelist);
pmd_t *pmd, _pmd;
@@ -1094,7 +1102,6 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
pgtable_t pgtable;
struct page *new_page;
spinlock_t *pmd_ptl, *pte_ptl;
- int isolated = 0, result = 0;
struct vm_area_struct *vma;
struct mmu_notifier_range range;
gfp_t gfp;
@@ -1102,6 +1109,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
int node;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ cr->result = SCAN_FAIL;
/* Only allocate from the target node */
gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
@@ -1113,6 +1121,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
* that. We will recheck the vma after taking it again in write mode.
*/
mmap_read_unlock(mm);
+ cr->dropped_mmap_lock = true;
node = khugepaged_find_target_node(cc);
/* sched to specified node before huage page memory copy */
@@ -1123,26 +1132,26 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
}
new_page = cc->alloc_hpage(cc, gfp, node);
if (!new_page) {
- result = SCAN_ALLOC_HUGE_PAGE_FAIL;
+ cr->result = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out_nolock;
}
if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) {
- result = SCAN_CGROUP_CHARGE_FAIL;
+ cr->result = SCAN_CGROUP_CHARGE_FAIL;
goto out_nolock;
}
count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);
mmap_read_lock(mm);
- result = hugepage_vma_revalidate(mm, address, &vma);
- if (result) {
+ cr->result = hugepage_vma_revalidate(mm, address, &vma);
+ if (cr->result) {
mmap_read_unlock(mm);
goto out_nolock;
}
pmd = mm_find_pmd(mm, address);
if (!pmd) {
- result = SCAN_PMD_NULL;
+ cr->result = SCAN_PMD_NULL;
mmap_read_unlock(mm);
goto out_nolock;
}
@@ -1165,8 +1174,8 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
* handled by the anon_vma lock + PG_lock.
*/
mmap_write_lock(mm);
- result = hugepage_vma_revalidate(mm, address, &vma);
- if (result)
+ cr->result = hugepage_vma_revalidate(mm, address, &vma);
+ if (cr->result)
goto out_up_write;
/* check if the pmd is still valid */
if (mm_find_pmd(mm, address) != pmd)
@@ -1193,11 +1202,11 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
mmu_notifier_invalidate_range_end(&range);
spin_lock(pte_ptl);
- isolated = __collapse_huge_page_isolate(vma, address, pte,
- &compound_pagelist);
+ cr->result = __collapse_huge_page_isolate(vma, address, pte,
+ &compound_pagelist);
spin_unlock(pte_ptl);
- if (unlikely(!isolated)) {
+ if (unlikely(cr->result != SCAN_SUCCEED)) {
pte_unmap(pte);
spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
@@ -1209,7 +1218,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
pmd_populate(mm, pmd, pmd_pgtable(_pmd));
spin_unlock(pmd_ptl);
anon_vma_unlock_write(vma->anon_vma);
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto out_up_write;
}
@@ -1245,25 +1254,25 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
cc->hpage = NULL;
- khugepaged_pages_collapsed++;
- result = SCAN_SUCCEED;
+ cr->result = SCAN_SUCCEED;
out_up_write:
mmap_write_unlock(mm);
out_nolock:
if (!IS_ERR_OR_NULL(cc->hpage))
mem_cgroup_uncharge(page_folio(cc->hpage));
- trace_mm_collapse_huge_page(mm, isolated, result);
+ trace_mm_collapse_huge_page(mm, cr->result == SCAN_SUCCEED, cr->result);
return;
}
-static int khugepaged_scan_pmd(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long address,
- struct collapse_control *cc)
+static void khugepaged_scan_pmd(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long address,
+ struct collapse_control *cc,
+ struct collapse_result *cr)
{
pmd_t *pmd;
pte_t *pte, *_pte;
- int ret = 0, result = 0, referenced = 0;
+ int referenced = 0;
int none_or_zero = 0, shared = 0;
struct page *page = NULL;
unsigned long _address;
@@ -1272,9 +1281,10 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
bool writable = false;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ cr->result = SCAN_FAIL;
- result = find_pmd_or_thp_or_none(mm, address, &pmd);
- if (result != SCAN_SUCCEED)
+ cr->result = find_pmd_or_thp_or_none(mm, address, &pmd);
+ if (cr->result != SCAN_SUCCEED)
goto out;
memset(cc->node_load, 0, sizeof(cc->node_load));
@@ -1290,12 +1300,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
* comment below for pte_uffd_wp().
*/
if (pte_swp_uffd_wp(pteval)) {
- result = SCAN_PTE_UFFD_WP;
+ cr->result = SCAN_PTE_UFFD_WP;
goto out_unmap;
}
continue;
} else {
- result = SCAN_EXCEED_SWAP_PTE;
+ cr->result = SCAN_EXCEED_SWAP_PTE;
count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
goto out_unmap;
}
@@ -1305,7 +1315,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
++none_or_zero <= khugepaged_max_ptes_none) {
continue;
} else {
- result = SCAN_EXCEED_NONE_PTE;
+ cr->result = SCAN_EXCEED_NONE_PTE;
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
goto out_unmap;
}
@@ -1320,7 +1330,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
* userfault messages that falls outside of
* the registered range. So, just be simple.
*/
- result = SCAN_PTE_UFFD_WP;
+ cr->result = SCAN_PTE_UFFD_WP;
goto out_unmap;
}
if (pte_write(pteval))
@@ -1328,13 +1338,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
page = vm_normal_page(vma, _address, pteval);
if (unlikely(!page)) {
- result = SCAN_PAGE_NULL;
+ cr->result = SCAN_PAGE_NULL;
goto out_unmap;
}
if (page_mapcount(page) > 1 &&
++shared > khugepaged_max_ptes_shared) {
- result = SCAN_EXCEED_SHARED_PTE;
+ cr->result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out_unmap;
}
@@ -1349,20 +1359,20 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
*/
node = page_to_nid(page);
if (khugepaged_scan_abort(node, cc)) {
- result = SCAN_SCAN_ABORT;
+ cr->result = SCAN_SCAN_ABORT;
goto out_unmap;
}
cc->node_load[node]++;
if (!PageLRU(page)) {
- result = SCAN_PAGE_LRU;
+ cr->result = SCAN_PAGE_LRU;
goto out_unmap;
}
if (PageLocked(page)) {
- result = SCAN_PAGE_LOCK;
+ cr->result = SCAN_PAGE_LOCK;
goto out_unmap;
}
if (!PageAnon(page)) {
- result = SCAN_PAGE_ANON;
+ cr->result = SCAN_PAGE_ANON;
goto out_unmap;
}
@@ -1384,7 +1394,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
* will be done again later the risk seems low.
*/
if (!is_refcount_suitable(page)) {
- result = SCAN_PAGE_COUNT;
+ cr->result = SCAN_PAGE_COUNT;
goto out_unmap;
}
if (pte_young(pteval) ||
@@ -1393,23 +1403,20 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
referenced++;
}
if (!writable) {
- result = SCAN_PAGE_RO;
+ cr->result = SCAN_PAGE_RO;
} else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) {
- result = SCAN_LACK_REFERENCED_PAGE;
+ cr->result = SCAN_LACK_REFERENCED_PAGE;
} else {
- result = SCAN_SUCCEED;
- ret = 1;
+ cr->result = SCAN_SUCCEED;
}
out_unmap:
pte_unmap_unlock(pte, ptl);
- if (ret) {
+ if (cr->result == SCAN_SUCCEED)
/* collapse_huge_page will return with the mmap_lock released */
- collapse_huge_page(mm, address, cc, referenced, unmapped);
- }
+ collapse_huge_page(mm, address, cc, referenced, unmapped, cr);
out:
trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
- none_or_zero, result, unmapped);
- return ret;
+ none_or_zero, cr->result, unmapped);
}
static void collect_mm_slot(struct mm_slot *mm_slot)
@@ -1670,6 +1677,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* @file: file that collapse on
* @start: collapse start address
* @cc: collapse context and scratchpad
+ * @cr: aggregate result information of collapse
*
* Basic scheme is simple, details are more complex:
* - allocate and lock a new huge page;
@@ -1688,7 +1696,9 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
*/
static void collapse_file(struct mm_struct *mm,
struct file *file, pgoff_t start,
- struct collapse_control *cc)
+ struct collapse_control *cc,
+ struct collapse_result *cr)
+
{
struct address_space *mapping = file->f_mapping;
gfp_t gfp;
@@ -1696,25 +1706,27 @@ static void collapse_file(struct mm_struct *mm,
pgoff_t index, end = start + HPAGE_PMD_NR;
LIST_HEAD(pagelist);
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
- int nr_none = 0, result = SCAN_SUCCEED;
+ int nr_none = 0;
bool is_shmem = shmem_file(file);
int nr, node;
VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
+ cr->result = SCAN_SUCCEED;
+
/* Only allocate from the target node */
gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
node = khugepaged_find_target_node(cc);
new_page = cc->alloc_hpage(cc, gfp, node);
if (!new_page) {
- result = SCAN_ALLOC_HUGE_PAGE_FAIL;
+ cr->result = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out;
}
if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) {
- result = SCAN_CGROUP_CHARGE_FAIL;
+ cr->result = SCAN_CGROUP_CHARGE_FAIL;
goto out;
}
count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);
@@ -1730,7 +1742,7 @@ static void collapse_file(struct mm_struct *mm,
break;
xas_unlock_irq(&xas);
if (!xas_nomem(&xas, GFP_KERNEL)) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto out;
}
} while (1);
@@ -1761,13 +1773,13 @@ static void collapse_file(struct mm_struct *mm,
*/
if (index == start) {
if (!xas_next_entry(&xas, end - 1)) {
- result = SCAN_TRUNCATED;
+ cr->result = SCAN_TRUNCATED;
goto xa_locked;
}
xas_set(&xas, index);
}
if (!shmem_charge(mapping->host, 1)) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto xa_locked;
}
xas_store(&xas, new_page);
@@ -1780,14 +1792,14 @@ static void collapse_file(struct mm_struct *mm,
/* swap in or instantiate fallocated page */
if (shmem_getpage(mapping->host, index, &page,
SGP_NOALLOC)) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto xa_unlocked;
}
} else if (trylock_page(page)) {
get_page(page);
xas_unlock_irq(&xas);
} else {
- result = SCAN_PAGE_LOCK;
+ cr->result = SCAN_PAGE_LOCK;
goto xa_locked;
}
} else { /* !is_shmem */
@@ -1800,7 +1812,7 @@ static void collapse_file(struct mm_struct *mm,
lru_add_drain();
page = find_lock_page(mapping, index);
if (unlikely(page == NULL)) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto xa_unlocked;
}
} else if (PageDirty(page)) {
@@ -1819,17 +1831,17 @@ static void collapse_file(struct mm_struct *mm,
*/
xas_unlock_irq(&xas);
filemap_flush(mapping);
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto xa_unlocked;
} else if (PageWriteback(page)) {
xas_unlock_irq(&xas);
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto xa_unlocked;
} else if (trylock_page(page)) {
get_page(page);
xas_unlock_irq(&xas);
} else {
- result = SCAN_PAGE_LOCK;
+ cr->result = SCAN_PAGE_LOCK;
goto xa_locked;
}
}
@@ -1842,7 +1854,7 @@ static void collapse_file(struct mm_struct *mm,
/* make sure the page is up to date */
if (unlikely(!PageUptodate(page))) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto out_unlock;
}
@@ -1851,12 +1863,12 @@ static void collapse_file(struct mm_struct *mm,
* we locked the first page, then a THP might be there already.
*/
if (PageTransCompound(page)) {
- result = SCAN_PAGE_COMPOUND;
+ cr->result = SCAN_PAGE_COMPOUND;
goto out_unlock;
}
if (page_mapping(page) != mapping) {
- result = SCAN_TRUNCATED;
+ cr->result = SCAN_TRUNCATED;
goto out_unlock;
}
@@ -1867,18 +1879,18 @@ static void collapse_file(struct mm_struct *mm,
* page is dirty because it hasn't been flushed
* since first write.
*/
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto out_unlock;
}
if (isolate_lru_page(page)) {
- result = SCAN_DEL_PAGE_LRU;
+ cr->result = SCAN_DEL_PAGE_LRU;
goto out_unlock;
}
if (page_has_private(page) &&
!try_to_release_page(page, GFP_KERNEL)) {
- result = SCAN_PAGE_HAS_PRIVATE;
+ cr->result = SCAN_PAGE_HAS_PRIVATE;
putback_lru_page(page);
goto out_unlock;
}
@@ -1899,7 +1911,7 @@ static void collapse_file(struct mm_struct *mm,
* - one from isolate_lru_page;
*/
if (!page_ref_freeze(page, 3)) {
- result = SCAN_PAGE_COUNT;
+ cr->result = SCAN_PAGE_COUNT;
xas_unlock_irq(&xas);
putback_lru_page(page);
goto out_unlock;
@@ -1934,7 +1946,7 @@ static void collapse_file(struct mm_struct *mm,
*/
smp_mb();
if (inode_is_open_for_write(mapping->host)) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
__mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr);
filemap_nr_thps_dec(mapping);
goto xa_locked;
@@ -1961,7 +1973,7 @@ static void collapse_file(struct mm_struct *mm,
*/
try_to_unmap_flush();
- if (result == SCAN_SUCCEED) {
+ if (cr->result == SCAN_SUCCEED) {
struct page *page, *tmp;
/*
@@ -2001,8 +2013,6 @@ static void collapse_file(struct mm_struct *mm,
*/
retract_page_tables(mapping, start);
cc->hpage = NULL;
-
- khugepaged_pages_collapsed++;
} else {
struct page *page;
@@ -2054,15 +2064,16 @@ static void collapse_file(struct mm_struct *mm,
static void khugepaged_scan_file(struct mm_struct *mm,
struct file *file, pgoff_t start,
- struct collapse_control *cc)
+ struct collapse_control *cc,
+ struct collapse_result *cr)
{
struct page *page = NULL;
struct address_space *mapping = file->f_mapping;
XA_STATE(xas, &mapping->i_pages, start);
int present, swap;
int node = NUMA_NO_NODE;
- int result = SCAN_SUCCEED;
+ cr->result = SCAN_SUCCEED;
present = 0;
swap = 0;
memset(cc->node_load, 0, sizeof(cc->node_load));
@@ -2073,7 +2084,7 @@ static void khugepaged_scan_file(struct mm_struct *mm,
if (xa_is_value(page)) {
if (++swap > khugepaged_max_ptes_swap) {
- result = SCAN_EXCEED_SWAP_PTE;
+ cr->result = SCAN_EXCEED_SWAP_PTE;
count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
break;
}
@@ -2085,25 +2096,25 @@ static void khugepaged_scan_file(struct mm_struct *mm,
* into a PMD sized page
*/
if (PageTransCompound(page)) {
- result = SCAN_PAGE_COMPOUND;
+ cr->result = SCAN_PAGE_COMPOUND;
break;
}
node = page_to_nid(page);
if (khugepaged_scan_abort(node, cc)) {
- result = SCAN_SCAN_ABORT;
+ cr->result = SCAN_SCAN_ABORT;
break;
}
cc->node_load[node]++;
if (!PageLRU(page)) {
- result = SCAN_PAGE_LRU;
+ cr->result = SCAN_PAGE_LRU;
break;
}
if (page_count(page) !=
1 + page_mapcount(page) + page_has_private(page)) {
- result = SCAN_PAGE_COUNT;
+ cr->result = SCAN_PAGE_COUNT;
break;
}
@@ -2122,12 +2133,12 @@ static void khugepaged_scan_file(struct mm_struct *mm,
}
rcu_read_unlock();
- if (result == SCAN_SUCCEED) {
+ if (cr->result == SCAN_SUCCEED) {
if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
- result = SCAN_EXCEED_NONE_PTE;
+ cr->result = SCAN_EXCEED_NONE_PTE;
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
} else {
- collapse_file(mm, file, start, cc);
+ collapse_file(mm, file, start, cc, cr);
}
}
@@ -2136,7 +2147,8 @@ static void khugepaged_scan_file(struct mm_struct *mm,
#else
static void khugepaged_scan_file(struct mm_struct *mm,
struct file *file, pgoff_t start,
- struct collapse_control *cc)
+ struct collapse_control *cc,
+ struct collapse_result *cr)
{
BUILD_BUG();
}
@@ -2208,7 +2220,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
goto skip;
while (khugepaged_scan.address < hend) {
- int ret;
+ struct collapse_result cr = {0};
cond_resched();
if (unlikely(khugepaged_test_exit(mm)))
goto breakouterloop;
@@ -2222,17 +2234,20 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
khugepaged_scan.address);
mmap_read_unlock(mm);
- ret = 1;
- khugepaged_scan_file(mm, file, pgoff, cc);
+ cr.dropped_mmap_lock = true;
+ khugepaged_scan_file(mm, file, pgoff, cc, &cr);
fput(file);
} else {
- ret = khugepaged_scan_pmd(mm, vma,
- khugepaged_scan.address, cc);
+ khugepaged_scan_pmd(mm, vma,
+ khugepaged_scan.address,
+ cc, &cr);
}
+ if (cr.result == SCAN_SUCCEED)
+ ++khugepaged_pages_collapsed;
/* move to next address */
khugepaged_scan.address += HPAGE_PMD_SIZE;
progress += HPAGE_PMD_NR;
- if (ret)
+ if (cr.dropped_mmap_lock)
/* we released mmap_lock so break loop */
goto breakouterloop_mmap_lock;
if (progress >= pages)
Add struct collapse_result which aggregates data from a single khugepaged_scan_pmd() or khugapaged_scan_file() request. Change khugepaged to take action based on this returned data instead of deep within the collapsing functions themselves. Signed-off-by: Zach O'Keefe <zokeefe@google.com> --- mm/khugepaged.c | 187 ++++++++++++++++++++++++++---------------------- 1 file changed, 101 insertions(+), 86 deletions(-)