diff mbox series

[2/3] userfaultfd: convert __mcopy_atomic_hugetlb() to use a folio

Message ID 20230314033734.481904-3-zhangpeng362@huawei.com (mailing list archive)
State New
Headers show
Series userfaultfd: convert userfaultfd functions to use folios | expand

Commit Message

Peng Zhang March 14, 2023, 3:37 a.m. UTC
From: ZhangPeng <zhangpeng362@huawei.com>

Change copy_huge_page_from_user() to copy_large_folio_from_user().
Convert hugetlb_mcopy_atomic_pte() and __mcopy_atomic_hugetlb() to use a
folio.

Signed-off-by: ZhangPeng <zhangpeng362@huawei.com>
---
 include/linux/hugetlb.h |  4 ++--
 include/linux/mm.h      |  3 +--
 mm/hugetlb.c            | 25 ++++++++++++-------------
 mm/memory.c             | 18 ++++++++++--------
 mm/userfaultfd.c        | 20 +++++++++-----------
 5 files changed, 34 insertions(+), 36 deletions(-)

Comments

Matthew Wilcox March 14, 2023, 8:31 a.m. UTC | #1
On Tue, Mar 14, 2023 at 03:37:33AM +0000, Peng Zhang wrote:
> +++ b/include/linux/mm.h
> @@ -3546,9 +3546,8 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
>  				unsigned long addr_hint,
>  				struct vm_area_struct *vma,
>  				unsigned int pages_per_huge_page);
> -extern long copy_huge_page_from_user(struct page *dst_page,
> +extern long copy_large_folio_from_user(struct folio *dst_folio,

You can drop the 'extern'.

> +++ b/mm/memory.c
> @@ -5769,26 +5769,28 @@ void copy_user_huge_page(struct page *dst, struct page *src,
>  	process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg);
>  }
>  
> -long copy_huge_page_from_user(struct page *dst_page,
> +long copy_large_folio_from_user(struct folio *dst_folio,
>  				const void __user *usr_src,
> -				unsigned int pages_per_huge_page,
>  				bool allow_pagefault)
>  {
>  	void *page_kaddr;
>  	unsigned long i, rc = 0;
> -	unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
> +	unsigned int nr_pages = folio_nr_pages(dst_folio);
> +	unsigned long ret_val = nr_pages * PAGE_SIZE;
>  	struct page *subpage;
> +	struct folio *inner_folio;

What is an 'inner folio'?

> -	for (i = 0; i < pages_per_huge_page; i++) {
> -		subpage = nth_page(dst_page, i);
> +	for (i = 0; i < nr_pages; i++) {
> +		subpage = folio_page(dst_folio, i);
> +		inner_folio = page_folio(subpage);
>  		if (allow_pagefault)
> -			page_kaddr = kmap(subpage);
> +			page_kaddr = kmap_local_folio(inner_folio, 0);

This doesn't do what you think it does.  Did you test this?

>  		else
>  			page_kaddr = kmap_atomic(subpage);

Pretty sure all this should be converted to kmap_local and the atomic
bits should go away.

>  		rc = copy_from_user(page_kaddr,
>  				usr_src + i * PAGE_SIZE, PAGE_SIZE);
>  		if (allow_pagefault)
> -			kunmap(subpage);
> +			kunmap_local(page_kaddr);
>  		else
>  			kunmap_atomic(page_kaddr);
>  
> @@ -5796,7 +5798,7 @@ long copy_huge_page_from_user(struct page *dst_page,
>  		if (rc)
>  			break;
>  
> -		flush_dcache_page(subpage);
> +		flush_dcache_folio(inner_folio);

The flush should probably be pulled outside the loop.

> +			err = copy_large_folio_from_user(folio,
> +						(const void __user *) src_addr, true);

I wonder if this shouldn't be 'copy_folio_from_user()'.  after all,
it'll work for any size folio, right?
Peng Zhang March 14, 2023, 11:47 a.m. UTC | #2
On 2023/3/14 16:31, Matthew Wilcox wrote:

> On Tue, Mar 14, 2023 at 03:37:33AM +0000, Peng Zhang wrote:
>> +++ b/include/linux/mm.h
>> @@ -3546,9 +3546,8 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
>>   				unsigned long addr_hint,
>>   				struct vm_area_struct *vma,
>>   				unsigned int pages_per_huge_page);
>> -extern long copy_huge_page_from_user(struct page *dst_page,
>> +extern long copy_large_folio_from_user(struct folio *dst_folio,
> You can drop the 'extern'.

Got it.

>> +++ b/mm/memory.c
>> @@ -5769,26 +5769,28 @@ void copy_user_huge_page(struct page *dst, struct page *src,
>>   	process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg);
>>   }
>>   
>> -long copy_huge_page_from_user(struct page *dst_page,
>> +long copy_large_folio_from_user(struct folio *dst_folio,
>>   				const void __user *usr_src,
>> -				unsigned int pages_per_huge_page,
>>   				bool allow_pagefault)
>>   {
>>   	void *page_kaddr;
>>   	unsigned long i, rc = 0;
>> -	unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
>> +	unsigned int nr_pages = folio_nr_pages(dst_folio);
>> +	unsigned long ret_val = nr_pages * PAGE_SIZE;
>>   	struct page *subpage;
>> +	struct folio *inner_folio;
> What is an 'inner folio'?
>
>> -	for (i = 0; i < pages_per_huge_page; i++) {
>> -		subpage = nth_page(dst_page, i);
>> +	for (i = 0; i < nr_pages; i++) {
>> +		subpage = folio_page(dst_folio, i);
>> +		inner_folio = page_folio(subpage);
>>   		if (allow_pagefault)
>> -			page_kaddr = kmap(subpage);
>> +			page_kaddr = kmap_local_folio(inner_folio, 0);
> This doesn't do what you think it does.  Did you test this?
>
>>   		else
>>   			page_kaddr = kmap_atomic(subpage);
> Pretty sure all this should be converted to kmap_local and the atomic
> bits should go away.
>
>>   		rc = copy_from_user(page_kaddr,
>>   				usr_src + i * PAGE_SIZE, PAGE_SIZE);
>>   		if (allow_pagefault)
>> -			kunmap(subpage);
>> +			kunmap_local(page_kaddr);
>>   		else
>>   			kunmap_atomic(page_kaddr);
>>   
>> @@ -5796,7 +5798,7 @@ long copy_huge_page_from_user(struct page *dst_page,
>>   		if (rc)
>>   			break;
>>   
>> -		flush_dcache_page(subpage);
>> +		flush_dcache_folio(inner_folio);
> The flush should probably be pulled outside the loop.
>
>> +			err = copy_large_folio_from_user(folio,
>> +						(const void __user *) src_addr, true);
> I wonder if this shouldn't be 'copy_folio_from_user()'.  after all,
> it'll work for any size folio, right?

Thanks for your review.

I'll rename copy_large_folio_from_user() to copy_folio_from_user().
I'll delete the inner_folio. kmap() and kmap_atomic() will be
converted to the following code.

	page_kaddr = kmap_local_page(subpage);
	if (!allow_pagefault)
		pagefault_disable();
	rc = copy_from_user(page_kaddr,
			usr_src + i * PAGE_SIZE, PAGE_SIZE);
	if (!allow_pagefault)
		pagefault_enable();
	kunmap_local(page_kaddr);

flush_dcache_folio() will be placed outside the loop.

I'll fix all this in a v2 of this patch series.


Thanks,
Peng.
diff mbox series

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 7c977d234aba..030252ce51bd 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -163,7 +163,7 @@  int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
 				unsigned long dst_addr,
 				unsigned long src_addr,
 				enum mcopy_atomic_mode mode,
-				struct page **pagep,
+				struct folio **foliop,
 				bool wp_copy);
 #endif /* CONFIG_USERFAULTFD */
 bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
@@ -399,7 +399,7 @@  static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 						unsigned long dst_addr,
 						unsigned long src_addr,
 						enum mcopy_atomic_mode mode,
-						struct page **pagep,
+						struct folio **foliop,
 						bool wp_copy)
 {
 	BUG();
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1f79667824eb..5eff770adbaa 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3546,9 +3546,8 @@  extern void copy_user_huge_page(struct page *dst, struct page *src,
 				unsigned long addr_hint,
 				struct vm_area_struct *vma,
 				unsigned int pages_per_huge_page);
-extern long copy_huge_page_from_user(struct page *dst_page,
+extern long copy_large_folio_from_user(struct folio *dst_folio,
 				const void __user *usr_src,
-				unsigned int pages_per_huge_page,
 				bool allow_pagefault);
 
 /**
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 07abcb6eb203..20ce2b4e32b1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6163,7 +6163,7 @@  int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 			    unsigned long dst_addr,
 			    unsigned long src_addr,
 			    enum mcopy_atomic_mode mode,
-			    struct page **pagep,
+			    struct folio **foliop,
 			    bool wp_copy)
 {
 	bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
@@ -6185,7 +6185,7 @@  int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 		if (!folio)
 			goto out;
 		folio_in_pagecache = true;
-	} else if (!*pagep) {
+	} else if (!*foliop) {
 		/* If a page already exists, then it's UFFDIO_COPY for
 		 * a non-missing case. Return -EEXIST.
 		 */
@@ -6201,9 +6201,8 @@  int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 			goto out;
 		}
 
-		ret = copy_huge_page_from_user(&folio->page,
-						(const void __user *) src_addr,
-						pages_per_huge_page(h), false);
+		ret = copy_large_folio_from_user(folio,
+						(const void __user *) src_addr, false);
 
 		/* fallback to copy_from_user outside mmap_lock */
 		if (unlikely(ret)) {
@@ -6222,7 +6221,7 @@  int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 				ret = -ENOMEM;
 				goto out;
 			}
-			*pagep = &folio->page;
+			*foliop = folio;
 			/* Set the outparam pagep and return to the caller to
 			 * copy the contents outside the lock. Don't free the
 			 * page.
@@ -6232,23 +6231,23 @@  int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 	} else {
 		if (vm_shared &&
 		    hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) {
-			put_page(*pagep);
+			folio_put(*foliop);
 			ret = -EEXIST;
-			*pagep = NULL;
+			*foliop = NULL;
 			goto out;
 		}
 
 		folio = alloc_hugetlb_folio(dst_vma, dst_addr, 0);
 		if (IS_ERR(folio)) {
-			put_page(*pagep);
+			folio_put(*foliop);
 			ret = -ENOMEM;
-			*pagep = NULL;
+			*foliop = NULL;
 			goto out;
 		}
-		copy_user_huge_page(&folio->page, *pagep, dst_addr, dst_vma,
+		copy_user_huge_page(&folio->page, &((*foliop)->page), dst_addr, dst_vma,
 				    pages_per_huge_page(h));
-		put_page(*pagep);
-		*pagep = NULL;
+		folio_put(*foliop);
+		*foliop = NULL;
 	}
 
 	/*
diff --git a/mm/memory.c b/mm/memory.c
index f456f3b5049c..737794b7a19f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5769,26 +5769,28 @@  void copy_user_huge_page(struct page *dst, struct page *src,
 	process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg);
 }
 
-long copy_huge_page_from_user(struct page *dst_page,
+long copy_large_folio_from_user(struct folio *dst_folio,
 				const void __user *usr_src,
-				unsigned int pages_per_huge_page,
 				bool allow_pagefault)
 {
 	void *page_kaddr;
 	unsigned long i, rc = 0;
-	unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
+	unsigned int nr_pages = folio_nr_pages(dst_folio);
+	unsigned long ret_val = nr_pages * PAGE_SIZE;
 	struct page *subpage;
+	struct folio *inner_folio;
 
-	for (i = 0; i < pages_per_huge_page; i++) {
-		subpage = nth_page(dst_page, i);
+	for (i = 0; i < nr_pages; i++) {
+		subpage = folio_page(dst_folio, i);
+		inner_folio = page_folio(subpage);
 		if (allow_pagefault)
-			page_kaddr = kmap(subpage);
+			page_kaddr = kmap_local_folio(inner_folio, 0);
 		else
 			page_kaddr = kmap_atomic(subpage);
 		rc = copy_from_user(page_kaddr,
 				usr_src + i * PAGE_SIZE, PAGE_SIZE);
 		if (allow_pagefault)
-			kunmap(subpage);
+			kunmap_local(page_kaddr);
 		else
 			kunmap_atomic(page_kaddr);
 
@@ -5796,7 +5798,7 @@  long copy_huge_page_from_user(struct page *dst_page,
 		if (rc)
 			break;
 
-		flush_dcache_page(subpage);
+		flush_dcache_folio(inner_folio);
 
 		cond_resched();
 	}
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 263191c37fb5..d7435a1a9d4b 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -324,7 +324,7 @@  static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 	pte_t *dst_pte;
 	unsigned long src_addr, dst_addr;
 	long copied;
-	struct page *page;
+	struct folio *folio;
 	unsigned long vma_hpagesize;
 	pgoff_t idx;
 	u32 hash;
@@ -344,7 +344,7 @@  static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 	src_addr = src_start;
 	dst_addr = dst_start;
 	copied = 0;
-	page = NULL;
+	folio = NULL;
 	vma_hpagesize = vma_kernel_pagesize(dst_vma);
 
 	/*
@@ -413,7 +413,7 @@  static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 		}
 
 		err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
-					       dst_addr, src_addr, mode, &page,
+					       dst_addr, src_addr, mode, &folio,
 					       wp_copy);
 
 		hugetlb_vma_unlock_read(dst_vma);
@@ -423,12 +423,10 @@  static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 
 		if (unlikely(err == -ENOENT)) {
 			mmap_read_unlock(dst_mm);
-			BUG_ON(!page);
+			BUG_ON(!folio);
 
-			err = copy_huge_page_from_user(page,
-						(const void __user *)src_addr,
-						vma_hpagesize / PAGE_SIZE,
-						true);
+			err = copy_large_folio_from_user(folio,
+						(const void __user *) src_addr, true);
 			if (unlikely(err)) {
 				err = -EFAULT;
 				goto out;
@@ -438,7 +436,7 @@  static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 			dst_vma = NULL;
 			goto retry;
 		} else
-			BUG_ON(page);
+			BUG_ON(folio);
 
 		if (!err) {
 			dst_addr += vma_hpagesize;
@@ -455,8 +453,8 @@  static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 out_unlock:
 	mmap_read_unlock(dst_mm);
 out:
-	if (page)
-		put_page(page);
+	if (folio)
+		folio_put(folio);
 	BUG_ON(copied < 0);
 	BUG_ON(err > 0);
 	BUG_ON(!copied && !err);