Message ID | 20230314033734.481904-3-zhangpeng362@huawei.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | userfaultfd: convert userfaultfd functions to use folios | expand |
On Tue, Mar 14, 2023 at 03:37:33AM +0000, Peng Zhang wrote: > +++ b/include/linux/mm.h > @@ -3546,9 +3546,8 @@ extern void copy_user_huge_page(struct page *dst, struct page *src, > unsigned long addr_hint, > struct vm_area_struct *vma, > unsigned int pages_per_huge_page); > -extern long copy_huge_page_from_user(struct page *dst_page, > +extern long copy_large_folio_from_user(struct folio *dst_folio, You can drop the 'extern'. > +++ b/mm/memory.c > @@ -5769,26 +5769,28 @@ void copy_user_huge_page(struct page *dst, struct page *src, > process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); > } > > -long copy_huge_page_from_user(struct page *dst_page, > +long copy_large_folio_from_user(struct folio *dst_folio, > const void __user *usr_src, > - unsigned int pages_per_huge_page, > bool allow_pagefault) > { > void *page_kaddr; > unsigned long i, rc = 0; > - unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; > + unsigned int nr_pages = folio_nr_pages(dst_folio); > + unsigned long ret_val = nr_pages * PAGE_SIZE; > struct page *subpage; > + struct folio *inner_folio; What is an 'inner folio'? > - for (i = 0; i < pages_per_huge_page; i++) { > - subpage = nth_page(dst_page, i); > + for (i = 0; i < nr_pages; i++) { > + subpage = folio_page(dst_folio, i); > + inner_folio = page_folio(subpage); > if (allow_pagefault) > - page_kaddr = kmap(subpage); > + page_kaddr = kmap_local_folio(inner_folio, 0); This doesn't do what you think it does. Did you test this? > else > page_kaddr = kmap_atomic(subpage); Pretty sure all this should be converted to kmap_local and the atomic bits should go away. > rc = copy_from_user(page_kaddr, > usr_src + i * PAGE_SIZE, PAGE_SIZE); > if (allow_pagefault) > - kunmap(subpage); > + kunmap_local(page_kaddr); > else > kunmap_atomic(page_kaddr); > > @@ -5796,7 +5798,7 @@ long copy_huge_page_from_user(struct page *dst_page, > if (rc) > break; > > - flush_dcache_page(subpage); > + flush_dcache_folio(inner_folio); The flush should probably be pulled outside the loop. > + err = copy_large_folio_from_user(folio, > + (const void __user *) src_addr, true); I wonder if this shouldn't be 'copy_folio_from_user()'. after all, it'll work for any size folio, right?
On 2023/3/14 16:31, Matthew Wilcox wrote: > On Tue, Mar 14, 2023 at 03:37:33AM +0000, Peng Zhang wrote: >> +++ b/include/linux/mm.h >> @@ -3546,9 +3546,8 @@ extern void copy_user_huge_page(struct page *dst, struct page *src, >> unsigned long addr_hint, >> struct vm_area_struct *vma, >> unsigned int pages_per_huge_page); >> -extern long copy_huge_page_from_user(struct page *dst_page, >> +extern long copy_large_folio_from_user(struct folio *dst_folio, > You can drop the 'extern'. Got it. >> +++ b/mm/memory.c >> @@ -5769,26 +5769,28 @@ void copy_user_huge_page(struct page *dst, struct page *src, >> process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); >> } >> >> -long copy_huge_page_from_user(struct page *dst_page, >> +long copy_large_folio_from_user(struct folio *dst_folio, >> const void __user *usr_src, >> - unsigned int pages_per_huge_page, >> bool allow_pagefault) >> { >> void *page_kaddr; >> unsigned long i, rc = 0; >> - unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; >> + unsigned int nr_pages = folio_nr_pages(dst_folio); >> + unsigned long ret_val = nr_pages * PAGE_SIZE; >> struct page *subpage; >> + struct folio *inner_folio; > What is an 'inner folio'? > >> - for (i = 0; i < pages_per_huge_page; i++) { >> - subpage = nth_page(dst_page, i); >> + for (i = 0; i < nr_pages; i++) { >> + subpage = folio_page(dst_folio, i); >> + inner_folio = page_folio(subpage); >> if (allow_pagefault) >> - page_kaddr = kmap(subpage); >> + page_kaddr = kmap_local_folio(inner_folio, 0); > This doesn't do what you think it does. Did you test this? > >> else >> page_kaddr = kmap_atomic(subpage); > Pretty sure all this should be converted to kmap_local and the atomic > bits should go away. > >> rc = copy_from_user(page_kaddr, >> usr_src + i * PAGE_SIZE, PAGE_SIZE); >> if (allow_pagefault) >> - kunmap(subpage); >> + kunmap_local(page_kaddr); >> else >> kunmap_atomic(page_kaddr); >> >> @@ -5796,7 +5798,7 @@ long copy_huge_page_from_user(struct page *dst_page, >> if (rc) >> break; >> >> - flush_dcache_page(subpage); >> + flush_dcache_folio(inner_folio); > The flush should probably be pulled outside the loop. > >> + err = copy_large_folio_from_user(folio, >> + (const void __user *) src_addr, true); > I wonder if this shouldn't be 'copy_folio_from_user()'. after all, > it'll work for any size folio, right? Thanks for your review. I'll rename copy_large_folio_from_user() to copy_folio_from_user(). I'll delete the inner_folio. kmap() and kmap_atomic() will be converted to the following code. page_kaddr = kmap_local_page(subpage); if (!allow_pagefault) pagefault_disable(); rc = copy_from_user(page_kaddr, usr_src + i * PAGE_SIZE, PAGE_SIZE); if (!allow_pagefault) pagefault_enable(); kunmap_local(page_kaddr); flush_dcache_folio() will be placed outside the loop. I'll fix all this in a v2 of this patch series. Thanks, Peng.
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7c977d234aba..030252ce51bd 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -163,7 +163,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, unsigned long dst_addr, unsigned long src_addr, enum mcopy_atomic_mode mode, - struct page **pagep, + struct folio **foliop, bool wp_copy); #endif /* CONFIG_USERFAULTFD */ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, @@ -399,7 +399,7 @@ static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, unsigned long dst_addr, unsigned long src_addr, enum mcopy_atomic_mode mode, - struct page **pagep, + struct folio **foliop, bool wp_copy) { BUG(); diff --git a/include/linux/mm.h b/include/linux/mm.h index 1f79667824eb..5eff770adbaa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3546,9 +3546,8 @@ extern void copy_user_huge_page(struct page *dst, struct page *src, unsigned long addr_hint, struct vm_area_struct *vma, unsigned int pages_per_huge_page); -extern long copy_huge_page_from_user(struct page *dst_page, +extern long copy_large_folio_from_user(struct folio *dst_folio, const void __user *usr_src, - unsigned int pages_per_huge_page, bool allow_pagefault); /** diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 07abcb6eb203..20ce2b4e32b1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6163,7 +6163,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, unsigned long dst_addr, unsigned long src_addr, enum mcopy_atomic_mode mode, - struct page **pagep, + struct folio **foliop, bool wp_copy) { bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE); @@ -6185,7 +6185,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, if (!folio) goto out; folio_in_pagecache = true; - } else if (!*pagep) { + } else if (!*foliop) { /* If a page already exists, then it's UFFDIO_COPY for * a non-missing case. Return -EEXIST. */ @@ -6201,9 +6201,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, goto out; } - ret = copy_huge_page_from_user(&folio->page, - (const void __user *) src_addr, - pages_per_huge_page(h), false); + ret = copy_large_folio_from_user(folio, + (const void __user *) src_addr, false); /* fallback to copy_from_user outside mmap_lock */ if (unlikely(ret)) { @@ -6222,7 +6221,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ret = -ENOMEM; goto out; } - *pagep = &folio->page; + *foliop = folio; /* Set the outparam pagep and return to the caller to * copy the contents outside the lock. Don't free the * page. @@ -6232,23 +6231,23 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, } else { if (vm_shared && hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) { - put_page(*pagep); + folio_put(*foliop); ret = -EEXIST; - *pagep = NULL; + *foliop = NULL; goto out; } folio = alloc_hugetlb_folio(dst_vma, dst_addr, 0); if (IS_ERR(folio)) { - put_page(*pagep); + folio_put(*foliop); ret = -ENOMEM; - *pagep = NULL; + *foliop = NULL; goto out; } - copy_user_huge_page(&folio->page, *pagep, dst_addr, dst_vma, + copy_user_huge_page(&folio->page, &((*foliop)->page), dst_addr, dst_vma, pages_per_huge_page(h)); - put_page(*pagep); - *pagep = NULL; + folio_put(*foliop); + *foliop = NULL; } /* diff --git a/mm/memory.c b/mm/memory.c index f456f3b5049c..737794b7a19f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5769,26 +5769,28 @@ void copy_user_huge_page(struct page *dst, struct page *src, process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); } -long copy_huge_page_from_user(struct page *dst_page, +long copy_large_folio_from_user(struct folio *dst_folio, const void __user *usr_src, - unsigned int pages_per_huge_page, bool allow_pagefault) { void *page_kaddr; unsigned long i, rc = 0; - unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; + unsigned int nr_pages = folio_nr_pages(dst_folio); + unsigned long ret_val = nr_pages * PAGE_SIZE; struct page *subpage; + struct folio *inner_folio; - for (i = 0; i < pages_per_huge_page; i++) { - subpage = nth_page(dst_page, i); + for (i = 0; i < nr_pages; i++) { + subpage = folio_page(dst_folio, i); + inner_folio = page_folio(subpage); if (allow_pagefault) - page_kaddr = kmap(subpage); + page_kaddr = kmap_local_folio(inner_folio, 0); else page_kaddr = kmap_atomic(subpage); rc = copy_from_user(page_kaddr, usr_src + i * PAGE_SIZE, PAGE_SIZE); if (allow_pagefault) - kunmap(subpage); + kunmap_local(page_kaddr); else kunmap_atomic(page_kaddr); @@ -5796,7 +5798,7 @@ long copy_huge_page_from_user(struct page *dst_page, if (rc) break; - flush_dcache_page(subpage); + flush_dcache_folio(inner_folio); cond_resched(); } diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 263191c37fb5..d7435a1a9d4b 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -324,7 +324,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, pte_t *dst_pte; unsigned long src_addr, dst_addr; long copied; - struct page *page; + struct folio *folio; unsigned long vma_hpagesize; pgoff_t idx; u32 hash; @@ -344,7 +344,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, src_addr = src_start; dst_addr = dst_start; copied = 0; - page = NULL; + folio = NULL; vma_hpagesize = vma_kernel_pagesize(dst_vma); /* @@ -413,7 +413,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, } err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, - dst_addr, src_addr, mode, &page, + dst_addr, src_addr, mode, &folio, wp_copy); hugetlb_vma_unlock_read(dst_vma); @@ -423,12 +423,10 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, if (unlikely(err == -ENOENT)) { mmap_read_unlock(dst_mm); - BUG_ON(!page); + BUG_ON(!folio); - err = copy_huge_page_from_user(page, - (const void __user *)src_addr, - vma_hpagesize / PAGE_SIZE, - true); + err = copy_large_folio_from_user(folio, + (const void __user *) src_addr, true); if (unlikely(err)) { err = -EFAULT; goto out; @@ -438,7 +436,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, dst_vma = NULL; goto retry; } else - BUG_ON(page); + BUG_ON(folio); if (!err) { dst_addr += vma_hpagesize; @@ -455,8 +453,8 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, out_unlock: mmap_read_unlock(dst_mm); out: - if (page) - put_page(page); + if (folio) + folio_put(folio); BUG_ON(copied < 0); BUG_ON(err > 0); BUG_ON(!copied && !err);