[v2,1/1] mm/vmscan: avoid split PMD-mapped THP during shrink_folio_list()

Message ID	20240422055213.60231-1-ioworker0@gmail.com (mailing list archive)
State	New
Headers	show Return-Path: <owner-linux-mm@kvack.org> From: Lance Yang <ioworker0@gmail.com> To: akpm@linux-foundation.org Cc: willy@infradead.org, maskray@google.com, ziy@nvidia.com, ryan.roberts@arm.com, david@redhat.com, 21cnbao@gmail.com, mhocko@suse.com, fengwei.yin@intel.com, zokeefe@google.com, shy828301@gmail.com, xiehuan09@gmail.com, wangkefeng.wang@huawei.com, songmuchun@bytedance.com, peterx@redhat.com, minchan@kernel.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, Lance Yang <ioworker0@gmail.com> Subject: [PATCH v2 1/1] mm/vmscan: avoid split PMD-mapped THP during shrink_folio_list() Date: Mon, 22 Apr 2024 13:52:13 +0800 Message-Id: <20240422055213.60231-1-ioworker0@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	[v2,1/1] mm/vmscan: avoid split PMD-mapped THP during shrink_folio_list() \| expand [v2,1/1] mm/vmscan: avoid split PMD-mapped THP during shrink_folio_list()

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7cd07b83a3d0..56c7ea73090b 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -36,6 +36,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, unsigned long cp_flags); +bool discard_trans_pmd(struct vm_area_struct *vma, unsigned long addr, + struct folio *folio); vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 0f906dc6d280..670218f762c8 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -100,6 +100,8 @@ enum ttu_flags { * do a final flush if necessary */ TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock: * caller holds it */ + TTU_LAZYFREE_THP = 0x100, /* avoid splitting PMD-mapped THPs + * that are marked as lazyfree. */ }; #ifdef CONFIG_MMU diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 824eff9211db..63de1445feab 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1810,6 +1810,94 @@ static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd) mm_dec_nr_ptes(mm); } +bool discard_trans_pmd(struct vm_area_struct *vma, unsigned long addr, + struct folio *folio) +{ + struct mm_struct *mm = vma->vm_mm; + struct mmu_notifier_range range; + int ref_count, map_count; + struct mmu_gather tlb; + pmd_t *pmdp, orig_pmd; + struct page *page; + bool ret = false; + spinlock_t *ptl; + + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); + VM_WARN_ON_FOLIO(folio_test_swapbacked(folio), folio); + VM_WARN_ON_FOLIO(!folio_test_pmd_mappable(folio), folio); + + /* Perform best-effort early checks before acquiring the PMD lock */ + if (folio_ref_count(folio) != folio_mapcount(folio) + 1 || + folio_test_dirty(folio)) + return false; + + pmdp = mm_find_pmd(mm, addr); + if (unlikely(!pmdp)) + return false; + if (pmd_dirty(*pmdp)) + return false; + + tlb_gather_mmu(&tlb, mm); + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, + addr & HPAGE_PMD_MASK, + (addr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); + + ptl = pmd_lock(mm, pmdp); + orig_pmd = *pmdp; + if (unlikely(!pmd_present(orig_pmd) || !pmd_trans_huge(orig_pmd))) + goto out; + + page = pmd_page(orig_pmd); + if (unlikely(page_folio(page) != folio)) + goto out; + + orig_pmd = pmdp_huge_get_and_clear(mm, addr, pmdp); + tlb_remove_pmd_tlb_entry(&tlb, pmdp, addr); + + /* + * Syncing against concurrent GUP-fast: + * - clear PMD; barrier; read refcount + * - inc refcount; barrier; read PMD + */ + smp_mb(); + + ref_count = folio_ref_count(folio); + map_count = folio_mapcount(folio); + + /* + * Order reads for folio refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * If the PMD or folio is redirtied at this point, or if there are + * unexpected references, we will give up to discard this folio + * and remap it. + * + * The only folio refs must be one from isolation plus the rmap(s). + */ + if (ref_count != map_count + 1 || folio_test_dirty(folio) || + pmd_dirty(orig_pmd)) { + set_pmd_at(mm, addr, pmdp, orig_pmd); + goto out; + } + + folio_remove_rmap_pmd(folio, page, vma); + zap_deposited_table(mm, pmdp); + add_mm_counter(mm, MM_ANONPAGES, -HPAGE_PMD_NR); + folio_put(folio); + ret = true; + +out: + spin_unlock(ptl); + mmu_notifier_invalidate_range_end(&range); + + return ret; +} + int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr) { diff --git a/mm/rmap.c b/mm/rmap.c index 2608c40dffad..a7913a454028 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1631,6 +1631,12 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (flags & TTU_SYNC) pvmw.flags = PVMW_SYNC; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (flags & TTU_LAZYFREE_THP) + if (discard_trans_pmd(vma, address, folio)) + return true; +#endif + if (flags & TTU_SPLIT_HUGE_PMD) split_huge_pmd_address(vma, address, false, folio); diff --git a/mm/vmscan.c b/mm/vmscan.c index 49bd94423961..e2686cc0c037 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1277,6 +1277,13 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, if (folio_test_pmd_mappable(folio)) flags |= TTU_SPLIT_HUGE_PMD; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (folio_test_anon(folio) && !was_swapbacked && + (flags & TTU_SPLIT_HUGE_PMD)) + flags |= TTU_LAZYFREE_THP; +#endif + /* * Without TTU_SYNC, try_to_unmap will only begin to * hold PTL from the first present PTE within a large

[v2,1/1] mm/vmscan: avoid split PMD-mapped THP during shrink_folio_list()

Commit Message

Comments

Patch