Message ID | 20240924101654.1777697-3-dev.jain@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Do not shatter hugezeropage on wp-fault | expand |
On 24.09.24 12:16, Dev Jain wrote: > Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and > replace it with a PMD-mapped THP. Remember to flush TLB entry > corresponding to the hugezeropage. In case of failure, fallback > to splitting the PMD. > > Signed-off-by: Dev Jain <dev.jain@arm.com> Nothing jumped at me and it looks much cleaner now Acked-by: David Hildenbrand <david@redhat.com>
On 2024/9/24 18:16, Dev Jain wrote: > Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and > replace it with a PMD-mapped THP. Remember to flush TLB entry > corresponding to the hugezeropage. In case of failure, fallback > to splitting the PMD. > > Signed-off-by: Dev Jain <dev.jain@arm.com> > --- > mm/huge_memory.c | 44 +++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 43 insertions(+), 1 deletion(-) > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index bdbf67c18f6c..fbb195bc2038 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -1790,6 +1790,41 @@ void huge_pmd_set_accessed(struct vm_fault *vmf) > spin_unlock(vmf->ptl); > } > > +static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf) > +{ > + unsigned long haddr = vmf->address & HPAGE_PMD_MASK; > + struct vm_area_struct *vma = vmf->vma; > + struct mmu_notifier_range range; > + struct folio *folio; > + vm_fault_t ret = 0; > + > + folio = vma_alloc_anon_folio_pmd(vma, vmf->address); > + if (unlikely(!folio)) { > + ret = VM_FAULT_FALLBACK; I'd like to return VM_FAULT_FALLBACK if you re-post. Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> > + goto out; > + } > + > + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, haddr, > + haddr + HPAGE_PMD_SIZE); > + mmu_notifier_invalidate_range_start(&range); > + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); > + if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd))) > + goto release; > + ret = check_stable_address_space(vma->vm_mm); > + if (ret) > + goto release; > + (void)pmdp_huge_clear_flush(vma, haddr, vmf->pmd); > + map_anon_folio_pmd(folio, vmf->pmd, vma, haddr); > + goto unlock; > +release: > + folio_put(folio); > +unlock: > + spin_unlock(vmf->ptl); > + mmu_notifier_invalidate_range_end(&range); > +out: > + return ret; > +} > + > vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) > { > const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; > @@ -1802,8 +1837,15 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) > vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd); > VM_BUG_ON_VMA(!vma->anon_vma, vma); > > - if (is_huge_zero_pmd(orig_pmd)) > + if (is_huge_zero_pmd(orig_pmd)) { > + vm_fault_t ret = do_huge_zero_wp_pmd(vmf); > + > + if (!(ret & VM_FAULT_FALLBACK)) > + return ret; > + > + /* Fallback to splitting PMD if THP cannot be allocated */ > goto fallback; > + } > > spin_lock(vmf->ptl); >
On 9/24/24 18:43, Kefeng Wang wrote: > > > On 2024/9/24 18:16, Dev Jain wrote: >> Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and >> replace it with a PMD-mapped THP. Remember to flush TLB entry >> corresponding to the hugezeropage. In case of failure, fallback >> to splitting the PMD. >> >> Signed-off-by: Dev Jain <dev.jain@arm.com> >> --- >> mm/huge_memory.c | 44 +++++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 43 insertions(+), 1 deletion(-) >> >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index bdbf67c18f6c..fbb195bc2038 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -1790,6 +1790,41 @@ void huge_pmd_set_accessed(struct vm_fault *vmf) >> spin_unlock(vmf->ptl); >> } >> +static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf) >> +{ >> + unsigned long haddr = vmf->address & HPAGE_PMD_MASK; >> + struct vm_area_struct *vma = vmf->vma; >> + struct mmu_notifier_range range; >> + struct folio *folio; >> + vm_fault_t ret = 0; >> + >> + folio = vma_alloc_anon_folio_pmd(vma, vmf->address); >> + if (unlikely(!folio)) { >> + ret = VM_FAULT_FALLBACK; > > I'd like to return VM_FAULT_FALLBACK if you re-post. That's cleaner. I need to keep my "goto" obsession in control... > > Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> Thanks! > >> + goto out; >> + } >> + >> + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, >> haddr, >> + haddr + HPAGE_PMD_SIZE); >> + mmu_notifier_invalidate_range_start(&range); >> + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); >> + if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd))) >> + goto release; >> + ret = check_stable_address_space(vma->vm_mm); >> + if (ret) >> + goto release; >> + (void)pmdp_huge_clear_flush(vma, haddr, vmf->pmd); >> + map_anon_folio_pmd(folio, vmf->pmd, vma, haddr); >> + goto unlock; >> +release: >> + folio_put(folio); >> +unlock: >> + spin_unlock(vmf->ptl); >> + mmu_notifier_invalidate_range_end(&range); >> +out: >> + return ret; >> +} >> + >> vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) >> { >> const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; >> @@ -1802,8 +1837,15 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault >> *vmf) >> vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd); >> VM_BUG_ON_VMA(!vma->anon_vma, vma); >> - if (is_huge_zero_pmd(orig_pmd)) >> + if (is_huge_zero_pmd(orig_pmd)) { >> + vm_fault_t ret = do_huge_zero_wp_pmd(vmf); >> + >> + if (!(ret & VM_FAULT_FALLBACK)) >> + return ret; >> + >> + /* Fallback to splitting PMD if THP cannot be allocated */ >> goto fallback; >> + } >> spin_lock(vmf->ptl); > >
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bdbf67c18f6c..fbb195bc2038 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1790,6 +1790,41 @@ void huge_pmd_set_accessed(struct vm_fault *vmf) spin_unlock(vmf->ptl); } +static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf) +{ + unsigned long haddr = vmf->address & HPAGE_PMD_MASK; + struct vm_area_struct *vma = vmf->vma; + struct mmu_notifier_range range; + struct folio *folio; + vm_fault_t ret = 0; + + folio = vma_alloc_anon_folio_pmd(vma, vmf->address); + if (unlikely(!folio)) { + ret = VM_FAULT_FALLBACK; + goto out; + } + + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, haddr, + haddr + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd))) + goto release; + ret = check_stable_address_space(vma->vm_mm); + if (ret) + goto release; + (void)pmdp_huge_clear_flush(vma, haddr, vmf->pmd); + map_anon_folio_pmd(folio, vmf->pmd, vma, haddr); + goto unlock; +release: + folio_put(folio); +unlock: + spin_unlock(vmf->ptl); + mmu_notifier_invalidate_range_end(&range); +out: + return ret; +} + vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) { const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; @@ -1802,8 +1837,15 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd); VM_BUG_ON_VMA(!vma->anon_vma, vma); - if (is_huge_zero_pmd(orig_pmd)) + if (is_huge_zero_pmd(orig_pmd)) { + vm_fault_t ret = do_huge_zero_wp_pmd(vmf); + + if (!(ret & VM_FAULT_FALLBACK)) + return ret; + + /* Fallback to splitting PMD if THP cannot be allocated */ goto fallback; + } spin_lock(vmf->ptl);
Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and replace it with a PMD-mapped THP. Remember to flush TLB entry corresponding to the hugezeropage. In case of failure, fallback to splitting the PMD. Signed-off-by: Dev Jain <dev.jain@arm.com> --- mm/huge_memory.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-)