@@ -428,7 +428,7 @@ int folio_mkclean(struct folio *);
int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
struct vm_area_struct *vma);
-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
+void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked, bool unmap_clean);
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
@@ -2373,7 +2373,7 @@ static void unmap_folio(struct folio *folio)
try_to_unmap(folio, ttu_flags | TTU_IGNORE_MLOCK);
}
-static void remap_page(struct folio *folio, unsigned long nr)
+static void remap_page(struct folio *folio, unsigned long nr, bool unmap_clean)
{
int i = 0;
@@ -2381,7 +2381,7 @@ static void remap_page(struct folio *folio, unsigned long nr)
if (!folio_test_anon(folio))
return;
for (;;) {
- remove_migration_ptes(folio, folio, true);
+ remove_migration_ptes(folio, folio, true, unmap_clean);
i += folio_nr_pages(folio);
if (i >= nr)
break;
@@ -2560,7 +2560,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
}
local_irq_enable();
- remap_page(folio, nr);
+ remap_page(folio, nr, PageAnon(head));
if (PageSwapCache(head)) {
swp_entry_t entry = { .val = page_private(head) };
@@ -2789,7 +2789,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
if (mapping)
xas_unlock(&xas);
local_irq_enable();
- remap_page(folio, folio_nr_pages(folio));
+ remap_page(folio, folio_nr_pages(folio), false);
ret = -EBUSY;
}
@@ -30,6 +30,7 @@
#include <linux/writeback.h>
#include <linux/mempolicy.h>
#include <linux/vmalloc.h>
+#include <linux/vm_event_item.h>
#include <linux/security.h>
#include <linux/backing-dev.h>
#include <linux/compaction.h>
@@ -168,13 +169,62 @@ void putback_movable_pages(struct list_head *l)
}
}
+static bool try_to_unmap_clean(struct page_vma_mapped_walk *pvmw, struct page *page)
+{
+ void *addr;
+ bool dirty;
+ pte_t newpte;
+
+ VM_BUG_ON_PAGE(PageCompound(page), page);
+ VM_BUG_ON_PAGE(!PageAnon(page), page);
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
+
+ if (PageMlocked(page) || (pvmw->vma->vm_flags & VM_LOCKED))
+ return false;
+
+ /*
+ * The pmd entry mapping the old thp was flushed and the pte mapping
+ * this subpage has been non present. Therefore, this subpage is
+ * inaccessible. We don't need to remap it if it contains only zeros.
+ */
+ addr = kmap_local_page(page);
+ dirty = memchr_inv(addr, 0, PAGE_SIZE);
+ kunmap_local(addr);
+
+ if (dirty)
+ return false;
+
+ pte_clear_not_present_full(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, false);
+
+ if (userfaultfd_armed(pvmw->vma)) {
+ newpte = pte_mkspecial(pfn_pte(page_to_pfn(ZERO_PAGE(pvmw->address)),
+ pvmw->vma->vm_page_prot));
+ ptep_clear_flush(pvmw->vma, pvmw->address, pvmw->pte);
+ set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
+ dec_mm_counter(pvmw->vma->vm_mm, MM_ANONPAGES);
+ count_vm_event(THP_SPLIT_REMAP_READONLY_ZERO_PAGE);
+ return true;
+ }
+
+ dec_mm_counter(pvmw->vma->vm_mm, mm_counter(page));
+ count_vm_event(THP_SPLIT_UNMAP);
+ return true;
+}
+
+struct rmap_walk_arg {
+ struct folio *folio;
+ bool unmap_clean;
+};
+
/*
* Restore a potential migration pte to a working pte entry
*/
static bool remove_migration_pte(struct folio *folio,
- struct vm_area_struct *vma, unsigned long addr, void *old)
+ struct vm_area_struct *vma, unsigned long addr, void *arg)
{
- DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
+ struct rmap_walk_arg *rmap_walk_arg = arg;
+ DEFINE_FOLIO_VMA_WALK(pvmw, rmap_walk_arg->folio, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
while (page_vma_mapped_walk(&pvmw)) {
rmap_t rmap_flags = RMAP_NONE;
@@ -197,6 +247,8 @@ static bool remove_migration_pte(struct folio *folio,
continue;
}
#endif
+ if (rmap_walk_arg->unmap_clean && try_to_unmap_clean(&pvmw, new))
+ continue;
folio_get(folio);
pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
@@ -272,13 +324,20 @@ static bool remove_migration_pte(struct folio *folio,
* Get rid of all migration entries and replace them by
* references to the indicated page.
*/
-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked)
+void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked, bool unmap_clean)
{
+ struct rmap_walk_arg rmap_walk_arg = {
+ .folio = src,
+ .unmap_clean = unmap_clean,
+ };
+
struct rmap_walk_control rwc = {
.rmap_one = remove_migration_pte,
- .arg = src,
+ .arg = &rmap_walk_arg,
};
+ VM_BUG_ON_FOLIO(unmap_clean && src != dst, src);
+
if (locked)
rmap_walk_locked(dst, &rwc);
else
@@ -872,7 +931,7 @@ static int writeout(struct address_space *mapping, struct folio *folio)
* At this point we know that the migration attempt cannot
* be successful.
*/
- remove_migration_ptes(folio, folio, false);
+ remove_migration_ptes(folio, folio, false, false);
rc = mapping->a_ops->writepage(&folio->page, &wbc);
@@ -1128,7 +1187,7 @@ static int __unmap_and_move(struct folio *src, struct folio *dst,
if (page_was_mapped)
remove_migration_ptes(src,
- rc == MIGRATEPAGE_SUCCESS ? dst : src, false);
+ rc == MIGRATEPAGE_SUCCESS ? dst : src, false, false);
out_unlock_both:
folio_unlock(dst);
@@ -1338,7 +1397,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
if (page_was_mapped)
remove_migration_ptes(src,
- rc == MIGRATEPAGE_SUCCESS ? dst : src, false);
+ rc == MIGRATEPAGE_SUCCESS ? dst : src, false, false);
unlock_put_anon:
folio_unlock(dst);
@@ -421,7 +421,7 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns,
continue;
folio = page_folio(page);
- remove_migration_ptes(folio, folio, false);
+ remove_migration_ptes(folio, folio, false, false);
src_pfns[i] = 0;
folio_unlock(folio);
@@ -847,7 +847,7 @@ void migrate_device_finalize(unsigned long *src_pfns,
src = page_folio(page);
dst = page_folio(newpage);
- remove_migration_ptes(src, dst, false);
+ remove_migration_ptes(src, dst, false, false);
folio_unlock(src);
if (is_zone_device_page(page))