diff mbox series

[3/8] mm: Return the address from page_mapped_in_vma()

Message ID 20240229212036.2160900-4-willy@infradead.org (mailing list archive)
State New
Headers show
Series Some cleanups for memory-failure | expand

Commit Message

Matthew Wilcox Feb. 29, 2024, 9:20 p.m. UTC
The only user of this function calls page_address_in_vma() immediately
after page_mapped_in_vma() calculates it and uses it to return true/false.
Return the address instead, allowing memory-failure to skip the call
to page_address_in_vma().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/rmap.h |  2 +-
 mm/memory-failure.c  | 22 ++++++++++++++--------
 mm/page_vma_mapped.c | 14 +++++++-------
 3 files changed, 22 insertions(+), 16 deletions(-)

Comments

Miaohe Lin March 4, 2024, 12:31 p.m. UTC | #1
On 2024/3/1 5:20, Matthew Wilcox (Oracle) wrote:
> The only user of this function calls page_address_in_vma() immediately
> after page_mapped_in_vma() calculates it and uses it to return true/false.
> Return the address instead, allowing memory-failure to skip the call
> to page_address_in_vma().
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/rmap.h |  2 +-
>  mm/memory-failure.c  | 22 ++++++++++++++--------
>  mm/page_vma_mapped.c | 14 +++++++-------
>  3 files changed, 22 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index b7944a833668..ba027a4d9abf 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -698,7 +698,7 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
>  
>  void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
>  
> -int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
> +unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
>  
>  /*
>   * rmap_walk_control: To control rmap traversing for specific needs
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 7f8473c08ae3..40a8964954e5 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -462,10 +462,11 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
>  }
>  
>  static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
> -				  struct vm_area_struct *vma,
> -				  struct list_head *to_kill)
> +		struct vm_area_struct *vma, struct list_head *to_kill,
> +		unsigned long addr)
>  {
> -	unsigned long addr = page_address_in_vma(p, vma);
> +	if (addr == -EFAULT)
> +		return;

IIUC, this patch will change the semantic slightly. There is one corner case where page_mapped_in_vma()
returns true but page_address_in_vma() returns -EFAULT if mremap is done after the check. In that case,
SIGKILL will be sent to the user. But with this patch applied, SIGBUS will be sent to the user with address
before doing mremap. Or am I miss something?

Thanks.

>  	__add_to_kill(tsk, p, vma, to_kill, addr);
>  }
>  
> @@ -609,12 +610,13 @@ static void collect_procs_anon(struct folio *folio, struct page *page,
>  			continue;
>  		anon_vma_interval_tree_foreach(vmac, &av->rb_root,
>  					       pgoff, pgoff) {
> +			unsigned long addr;
> +
>  			vma = vmac->vma;
>  			if (vma->vm_mm != t->mm)
>  				continue;
> -			if (!page_mapped_in_vma(page, vma))
> -				continue;
> -			add_to_kill_anon_file(t, page, vma, to_kill);
> +			addr = page_mapped_in_vma(page, vma);
> +			add_to_kill_anon_file(t, page, vma, to_kill, addr);
>  		}
>  	}
>  	rcu_read_unlock();
> @@ -642,6 +644,8 @@ static void collect_procs_file(struct folio *folio, struct page *page,
>  			continue;
>  		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
>  				      pgoff) {
> +			unsigned long addr;
> +
>  			/*
>  			 * Send early kill signal to tasks where a vma covers
>  			 * the page but the corrupted page is not necessarily
> @@ -649,8 +653,10 @@ static void collect_procs_file(struct folio *folio, struct page *page,
>  			 * Assume applications who requested early kill want
>  			 * to be informed of all such data corruptions.
>  			 */
> -			if (vma->vm_mm == t->mm)
> -				add_to_kill_anon_file(t, page, vma, to_kill);
> +			if (vma->vm_mm != t->mm)
> +				continue;
> +			addr = page_address_in_vma(page, vma);
> +			add_to_kill_anon_file(t, page, vma, to_kill, addr);
>  		}
>  	}
>  	rcu_read_unlock();
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index 74d2de15fb5e..e9e208b4ac4b 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -319,11 +319,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>   * @page: the page to test
>   * @vma: the VMA to test
>   *
> - * Returns 1 if the page is mapped into the page tables of the VMA, 0
> - * if the page is not mapped into the page tables of this VMA.  Only
> - * valid for normal file or anonymous VMAs.
> + * Return: If the page is mapped into the page tables of the VMA, the
> + * address that the page is mapped at.  -EFAULT if the page is not mapped.
> + * Only valid for normal file or anonymous VMAs.
>   */
> -int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
> +unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
>  {
>  	struct page_vma_mapped_walk pvmw = {
>  		.pfn = page_to_pfn(page),
> @@ -334,9 +334,9 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
>  
>  	pvmw.address = vma_address(page, vma);
>  	if (pvmw.address == -EFAULT)
> -		return 0;
> +		return -EFAULT;
>  	if (!page_vma_mapped_walk(&pvmw))
> -		return 0;
> +		return -EFAULT;
>  	page_vma_mapped_walk_done(&pvmw);
> -	return 1;
> +	return pvmw.address;
>  }
>
Matthew Wilcox March 5, 2024, 8:09 p.m. UTC | #2
On Mon, Mar 04, 2024 at 08:31:56PM +0800, Miaohe Lin wrote:
> On 2024/3/1 5:20, Matthew Wilcox (Oracle) wrote:
> > The only user of this function calls page_address_in_vma() immediately
> > after page_mapped_in_vma() calculates it and uses it to return true/false.
> > Return the address instead, allowing memory-failure to skip the call
> > to page_address_in_vma().
> 
> IIUC, this patch will change the semantic slightly. There is one corner
> case where page_mapped_in_vma() returns true but page_address_in_vma()
> returns -EFAULT if mremap is done after the check. In that case,
> SIGKILL will be sent to the user. But with this patch applied, SIGBUS
> will be sent to the user with address before doing mremap. Or am I
> miss something?

Isn't that an example of a race that userspace can't possibly rely on?
It can't observe where the kernel has got to in its processing of the
fault, so it's OK if we behave if the mremap() has happened before,
during or after the two calls.
Miaohe Lin March 6, 2024, 8:10 a.m. UTC | #3
On 2024/3/6 4:09, Matthew Wilcox wrote:
> On Mon, Mar 04, 2024 at 08:31:56PM +0800, Miaohe Lin wrote:
>> On 2024/3/1 5:20, Matthew Wilcox (Oracle) wrote:
>>> The only user of this function calls page_address_in_vma() immediately
>>> after page_mapped_in_vma() calculates it and uses it to return true/false.
>>> Return the address instead, allowing memory-failure to skip the call
>>> to page_address_in_vma().
>>
>> IIUC, this patch will change the semantic slightly. There is one corner
>> case where page_mapped_in_vma() returns true but page_address_in_vma()
>> returns -EFAULT if mremap is done after the check. In that case,
>> SIGKILL will be sent to the user. But with this patch applied, SIGBUS
>> will be sent to the user with address before doing mremap. Or am I
>> miss something?
> 
> Isn't that an example of a race that userspace can't possibly rely on?

You're right. Userspace shouldn't possibly rely on it.
Thanks.

> It can't observe where the kernel has got to in its processing of the
> fault, so it's OK if we behave if the mremap() has happened before,
> during or after the two calls.
> .
>
Miaohe Lin March 6, 2024, 8:17 a.m. UTC | #4
On 2024/3/1 5:20, Matthew Wilcox (Oracle) wrote:
> The only user of this function calls page_address_in_vma() immediately
> after page_mapped_in_vma() calculates it and uses it to return true/false.
> Return the address instead, allowing memory-failure to skip the call
> to page_address_in_vma().
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/rmap.h |  2 +-
>  mm/memory-failure.c  | 22 ++++++++++++++--------
>  mm/page_vma_mapped.c | 14 +++++++-------
>  3 files changed, 22 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index b7944a833668..ba027a4d9abf 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -698,7 +698,7 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
>  
>  void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
>  
> -int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
> +unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
>  
>  /*
>   * rmap_walk_control: To control rmap traversing for specific needs
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 7f8473c08ae3..40a8964954e5 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -462,10 +462,11 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
>  }
>  
>  static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
> -				  struct vm_area_struct *vma,
> -				  struct list_head *to_kill)
> +		struct vm_area_struct *vma, struct list_head *to_kill,
> +		unsigned long addr)
>  {
> -	unsigned long addr = page_address_in_vma(p, vma);
> +	if (addr == -EFAULT)
> +		return;
>  	__add_to_kill(tsk, p, vma, to_kill, addr);
>  }
>  
> @@ -609,12 +610,13 @@ static void collect_procs_anon(struct folio *folio, struct page *page,
>  			continue;
>  		anon_vma_interval_tree_foreach(vmac, &av->rb_root,
>  					       pgoff, pgoff) {
> +			unsigned long addr;
> +
>  			vma = vmac->vma;
>  			if (vma->vm_mm != t->mm)
>  				continue;
> -			if (!page_mapped_in_vma(page, vma))
> -				continue;
> -			add_to_kill_anon_file(t, page, vma, to_kill);
> +			addr = page_mapped_in_vma(page, vma);
> +			add_to_kill_anon_file(t, page, vma, to_kill, addr);
>  		}
>  	}
>  	rcu_read_unlock();
> @@ -642,6 +644,8 @@ static void collect_procs_file(struct folio *folio, struct page *page,
>  			continue;
>  		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
>  				      pgoff) {
> +			unsigned long addr;
> +
>  			/*
>  			 * Send early kill signal to tasks where a vma covers
>  			 * the page but the corrupted page is not necessarily
> @@ -649,8 +653,10 @@ static void collect_procs_file(struct folio *folio, struct page *page,
>  			 * Assume applications who requested early kill want
>  			 * to be informed of all such data corruptions.
>  			 */
> -			if (vma->vm_mm == t->mm)
> -				add_to_kill_anon_file(t, page, vma, to_kill);
> +			if (vma->vm_mm != t->mm)
> +				continue;
> +			addr = page_address_in_vma(page, vma);
> +			add_to_kill_anon_file(t, page, vma, to_kill, addr);
>  		}
>  	}
>  	rcu_read_unlock();
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index 74d2de15fb5e..e9e208b4ac4b 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -319,11 +319,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>   * @page: the page to test
>   * @vma: the VMA to test
>   *
> - * Returns 1 if the page is mapped into the page tables of the VMA, 0
> - * if the page is not mapped into the page tables of this VMA.  Only
> - * valid for normal file or anonymous VMAs.
> + * Return: If the page is mapped into the page tables of the VMA, the
> + * address that the page is mapped at.  -EFAULT if the page is not mapped.
> + * Only valid for normal file or anonymous VMAs.
>   */
> -int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
> +unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
>  {
>  	struct page_vma_mapped_walk pvmw = {
>  		.pfn = page_to_pfn(page),
> @@ -334,9 +334,9 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
>  
>  	pvmw.address = vma_address(page, vma);
>  	if (pvmw.address == -EFAULT)
> -		return 0;
> +		return -EFAULT;
>  	if (!page_vma_mapped_walk(&pvmw))
> -		return 0;
> +		return -EFAULT;
>  	page_vma_mapped_walk_done(&pvmw);
> -	return 1;
> +	return pvmw.address;

page_mapped_in_vma is only called by collect_procs_anon. Will it be better to make it under CONFIG_MEMORY_FAILURE?

Anyway, this patch looks good to me. Thanks.

Acked-by: Miaohe Lin <linmiaohe@huawei.com>
diff mbox series

Patch

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b7944a833668..ba027a4d9abf 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -698,7 +698,7 @@  int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
 
 void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
 
-int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
+unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
 /*
  * rmap_walk_control: To control rmap traversing for specific needs
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 7f8473c08ae3..40a8964954e5 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -462,10 +462,11 @@  static void __add_to_kill(struct task_struct *tsk, struct page *p,
 }
 
 static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
-				  struct vm_area_struct *vma,
-				  struct list_head *to_kill)
+		struct vm_area_struct *vma, struct list_head *to_kill,
+		unsigned long addr)
 {
-	unsigned long addr = page_address_in_vma(p, vma);
+	if (addr == -EFAULT)
+		return;
 	__add_to_kill(tsk, p, vma, to_kill, addr);
 }
 
@@ -609,12 +610,13 @@  static void collect_procs_anon(struct folio *folio, struct page *page,
 			continue;
 		anon_vma_interval_tree_foreach(vmac, &av->rb_root,
 					       pgoff, pgoff) {
+			unsigned long addr;
+
 			vma = vmac->vma;
 			if (vma->vm_mm != t->mm)
 				continue;
-			if (!page_mapped_in_vma(page, vma))
-				continue;
-			add_to_kill_anon_file(t, page, vma, to_kill);
+			addr = page_mapped_in_vma(page, vma);
+			add_to_kill_anon_file(t, page, vma, to_kill, addr);
 		}
 	}
 	rcu_read_unlock();
@@ -642,6 +644,8 @@  static void collect_procs_file(struct folio *folio, struct page *page,
 			continue;
 		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
 				      pgoff) {
+			unsigned long addr;
+
 			/*
 			 * Send early kill signal to tasks where a vma covers
 			 * the page but the corrupted page is not necessarily
@@ -649,8 +653,10 @@  static void collect_procs_file(struct folio *folio, struct page *page,
 			 * Assume applications who requested early kill want
 			 * to be informed of all such data corruptions.
 			 */
-			if (vma->vm_mm == t->mm)
-				add_to_kill_anon_file(t, page, vma, to_kill);
+			if (vma->vm_mm != t->mm)
+				continue;
+			addr = page_address_in_vma(page, vma);
+			add_to_kill_anon_file(t, page, vma, to_kill, addr);
 		}
 	}
 	rcu_read_unlock();
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 74d2de15fb5e..e9e208b4ac4b 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -319,11 +319,11 @@  bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
  * @page: the page to test
  * @vma: the VMA to test
  *
- * Returns 1 if the page is mapped into the page tables of the VMA, 0
- * if the page is not mapped into the page tables of this VMA.  Only
- * valid for normal file or anonymous VMAs.
+ * Return: If the page is mapped into the page tables of the VMA, the
+ * address that the page is mapped at.  -EFAULT if the page is not mapped.
+ * Only valid for normal file or anonymous VMAs.
  */
-int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 {
 	struct page_vma_mapped_walk pvmw = {
 		.pfn = page_to_pfn(page),
@@ -334,9 +334,9 @@  int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 
 	pvmw.address = vma_address(page, vma);
 	if (pvmw.address == -EFAULT)
-		return 0;
+		return -EFAULT;
 	if (!page_vma_mapped_walk(&pvmw))
-		return 0;
+		return -EFAULT;
 	page_vma_mapped_walk_done(&pvmw);
-	return 1;
+	return pvmw.address;
 }