diff mbox series

mm: Fix __wp_page_copy_user fallback path for remote mm

Message ID 20241101-mm-remote-pfn-v1-1-080b609270b7@asahilina.net (mailing list archive)
State New
Headers show
Series mm: Fix __wp_page_copy_user fallback path for remote mm | expand

Commit Message

Asahi Lina Nov. 1, 2024, 12:08 p.m. UTC
If the source page is a PFN mapping, we copy back from userspace.
However, if this fault is a remote access, we cannot use
__copy_from_user_inatomic. Instead, use access_remote_vm() in this case.

Fixes WARN and incorrect zero-filling when writing to CoW mappings in
a remote process, such as when using gdb on a binary present on a DAX
filesystem.

[  143.683782] ------------[ cut here ]------------
[  143.683784] WARNING: CPU: 1 PID: 350 at mm/memory.c:2904 __wp_page_copy_user+0x120/0x2bc
[  143.683793] CPU: 1 PID: 350 Comm: gdb Not tainted 6.6.52 #1
[  143.683794] Hardware name: linux,dummy-virt (DT)
[  143.683795] pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
[  143.683796] pc : __wp_page_copy_user+0x120/0x2bc
[  143.683798] lr : __wp_page_copy_user+0x254/0x2bc
[  143.683799] sp : ffff80008272b8b0
[  143.683799] x29: ffff80008272b8b0 x28: 0000000000000000 x27: ffff000083bad580
[  143.683801] x26: 0000000000000000 x25: 0000fffff7fd5000 x24: ffff000081db04c0
[  143.683802] x23: ffff00014f24b000 x22: fffffc00053c92c0 x21: ffff000083502150
[  143.683803] x20: 0000fffff7fd5000 x19: ffff80008272b9d0 x18: 0000000000000000
[  143.683804] x17: ffff000081db0500 x16: ffff800080fe52a0 x15: 0000fffff7fd5000
[  143.683804] x14: 0000000000bb1845 x13: 0000000000000080 x12: ffff80008272b880
[  143.683805] x11: ffff000081d13600 x10: ffff000081d13608 x9 : ffff000081d1360c
[  143.683806] x8 : ffff000083a16f00 x7 : 0000000000000010 x6 : ffff00014f24b000
[  143.683807] x5 : ffff00014f24c000 x4 : 0000000000000000 x3 : ffff000083582000
[  143.683807] x2 : 0000000000000f80 x1 : 0000fffff7fd5000 x0 : 0000000000001000
[  143.683808] Call trace:
[  143.683809]  __wp_page_copy_user+0x120/0x2bc
[  143.683810]  wp_page_copy+0x98/0x5c0
[  143.683813]  do_wp_page+0x250/0x530
[  143.683814]  __handle_mm_fault+0x278/0x284
[  143.683817]  handle_mm_fault+0x64/0x1e8
[  143.683819]  faultin_page+0x5c/0x110
[  143.683820]  __get_user_pages+0xc8/0x2f4
[  143.683821]  get_user_pages_remote+0xac/0x30c
[  143.683823]  __access_remote_vm+0xb4/0x368
[  143.683824]  access_remote_vm+0x10/0x1c
[  143.683826]  mem_rw.isra.0+0xc4/0x218
[  143.683831]  mem_write+0x18/0x24
[  143.683831]  vfs_write+0xa0/0x37c
[  143.683834]  ksys_pwrite64+0x7c/0xc0
[  143.683834]  __arm64_sys_pwrite64+0x20/0x2c
[  143.683835]  invoke_syscall+0x48/0x10c
[  143.683837]  el0_svc_common.constprop.0+0x40/0xe0
[  143.683839]  do_el0_svc+0x1c/0x28
[  143.683841]  el0_svc+0x3c/0xdc
[  143.683846]  el0t_64_sync_handler+0x120/0x12c
[  143.683848]  el0t_64_sync+0x194/0x198
[  143.683849] ---[ end trace 0000000000000000 ]---

Signed-off-by: Asahi Lina <lina@asahilina.net>
---
 mm/memory.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)


---
base-commit: 81983758430957d9a5cb3333fe324fd70cf63e7e
change-id: 20241101-mm-remote-pfn-d252669b9475

Cheers,
~~ Lina

Comments

Andrew Morton Nov. 1, 2024, 7:07 p.m. UTC | #1
On Fri, 01 Nov 2024 21:08:02 +0900 Asahi Lina <lina@asahilina.net> wrote:

> If the source page is a PFN mapping, we copy back from userspace.
> However, if this fault is a remote access, we cannot use
> __copy_from_user_inatomic. Instead, use access_remote_vm() in this case.
> 
> Fixes WARN and incorrect zero-filling when writing to CoW mappings in
> a remote process, such as when using gdb on a binary present on a DAX
> filesystem.
>
> [  143.683782] ------------[ cut here ]------------
> [  143.683784] WARNING: CPU: 1 PID: 350 at mm/memory.c:2904 __wp_page_copy_user+0x120/0x2bc
>
> ...
>

Thanks.  I assume we should backport this into earlier kernels?

If so, a Fixes: target is desired, to tell people how far back in time
it should be ported.  I think it's

83d116c53058 ("mm: fix double page fault on arm64 if PTE_AF is cleared").


> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3081,13 +3081,18 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src,
>  			update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);
>  	}
>  
> +	/* If the mm is a remote mm, copy in the page using access_remote_vm() */
> +	if (current->mm != mm) {
> +		if (access_remote_vm(mm, (unsigned long)uaddr, kaddr, PAGE_SIZE, 0) != PAGE_SIZE)
> +			goto warn;
> +	}
>  	/*
>  	 * This really shouldn't fail, because the page is there
>  	 * in the page tables. But it might just be unreadable,
>  	 * in which case we just give up and fill the result with
>  	 * zeroes.
>  	 */
> -	if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
> +	else if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
>  		if (vmf->pte)
>  			goto warn;
>  

The coding style ends up being unconventional.  I made these changes:

--- a/mm/memory.c~mm-fix-__wp_page_copy_user-fallback-path-for-remote-mm-fix
+++ a/mm/memory.c
@@ -3081,18 +3081,20 @@ static inline int __wp_page_copy_user(st
 			update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);
 	}
 
-	/* If the mm is a remote mm, copy in the page using access_remote_vm() */
-	if (current->mm != mm) {
-		if (access_remote_vm(mm, (unsigned long)uaddr, kaddr, PAGE_SIZE, 0) != PAGE_SIZE)
-			goto warn;
-	}
 	/*
-	 * This really shouldn't fail, because the page is there
-	 * in the page tables. But it might just be unreadable,
-	 * in which case we just give up and fill the result with
-	 * zeroes.
+	 * If the mm is a remote mm, copy in the page using access_remote_vm()
 	 */
-	else if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
+	if (current->mm != mm) {
+		if (access_remote_vm(mm, (unsigned long)uaddr, kaddr,
+				     PAGE_SIZE, 0) != PAGE_SIZE)
+			goto warn;
+	} else if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
+		/*
+		 * This really shouldn't fail, because the page is there
+		 * in the page tables. But it might just be unreadable,
+		 * in which case we just give up and fill the result with
+		 * zeroes.
+		 */
 		if (vmf->pte)
 			goto warn;
Asahi Lina Nov. 1, 2024, 9:18 p.m. UTC | #2
On 11/2/24 4:07 AM, Andrew Morton wrote:
> On Fri, 01 Nov 2024 21:08:02 +0900 Asahi Lina <lina@asahilina.net> wrote:
> 
>> If the source page is a PFN mapping, we copy back from userspace.
>> However, if this fault is a remote access, we cannot use
>> __copy_from_user_inatomic. Instead, use access_remote_vm() in this case.
>>
>> Fixes WARN and incorrect zero-filling when writing to CoW mappings in
>> a remote process, such as when using gdb on a binary present on a DAX
>> filesystem.
>>
>> [  143.683782] ------------[ cut here ]------------
>> [  143.683784] WARNING: CPU: 1 PID: 350 at mm/memory.c:2904 __wp_page_copy_user+0x120/0x2bc
>>
>> ...
>>
> 
> Thanks.  I assume we should backport this into earlier kernels?
> 
> If so, a Fixes: target is desired, to tell people how far back in time
> it should be ported.

I think so? I'm not sure how back the bug goes though, possibly a long
time...

> I think it's
> 
> 83d116c53058 ("mm: fix double page fault on arm64 if PTE_AF is cleared").

That doesn't sound right. The old code prior to the patch still had the
__copy_from_user_inatomic() fallback path so it should still have the
same problem. That fallback goes back to:

  6aab341e0a28 ("mm: re-architect the VM_UNPAGED logic")

But the ptrace code back then doesn't seem to be using that codepath at
all, so that's meaningless. I think this is the proper tag:

  3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")

That's when GUP started working for DAX mappings at all, and if my
reading of the code is correct, at that point do_wp_page() was only
grabbing the struct page for normal pages to pass to wp_page_copy()
(triggering the fallback path for DAX mappings). The code has moved
around a lot today but has the same logic, so I think it's been broken
since then.

Should I resend it with the Fixes tag?

> 
> 
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -3081,13 +3081,18 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src,
>>  			update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);
>>  	}
>>  
>> +	/* If the mm is a remote mm, copy in the page using access_remote_vm() */
>> +	if (current->mm != mm) {
>> +		if (access_remote_vm(mm, (unsigned long)uaddr, kaddr, PAGE_SIZE, 0) != PAGE_SIZE)
>> +			goto warn;
>> +	}
>>  	/*
>>  	 * This really shouldn't fail, because the page is there
>>  	 * in the page tables. But it might just be unreadable,
>>  	 * in which case we just give up and fill the result with
>>  	 * zeroes.
>>  	 */
>> -	if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
>> +	else if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
>>  		if (vmf->pte)
>>  			goto warn;
>>  
> 
> The coding style ends up being unconventional.  I made these changes:
> 
> --- a/mm/memory.c~mm-fix-__wp_page_copy_user-fallback-path-for-remote-mm-fix
> +++ a/mm/memory.c
> @@ -3081,18 +3081,20 @@ static inline int __wp_page_copy_user(st
>  			update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);
>  	}
>  
> -	/* If the mm is a remote mm, copy in the page using access_remote_vm() */
> -	if (current->mm != mm) {
> -		if (access_remote_vm(mm, (unsigned long)uaddr, kaddr, PAGE_SIZE, 0) != PAGE_SIZE)
> -			goto warn;
> -	}
>  	/*
> -	 * This really shouldn't fail, because the page is there
> -	 * in the page tables. But it might just be unreadable,
> -	 * in which case we just give up and fill the result with
> -	 * zeroes.
> +	 * If the mm is a remote mm, copy in the page using access_remote_vm()
>  	 */
> -	else if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
> +	if (current->mm != mm) {
> +		if (access_remote_vm(mm, (unsigned long)uaddr, kaddr,
> +				     PAGE_SIZE, 0) != PAGE_SIZE)
> +			goto warn;
> +	} else if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
> +		/*
> +		 * This really shouldn't fail, because the page is there
> +		 * in the page tables. But it might just be unreadable,
> +		 * in which case we just give up and fill the result with
> +		 * zeroes.
> +		 */
>  		if (vmf->pte)
>  			goto warn;
>  
> _
> 
> I'll queue this for testing and shall await further review.
> 

~~ Lina
diff mbox series

Patch

diff --git a/mm/memory.c b/mm/memory.c
index 3ccee51adfbbd007b24331fe6874265f231a877b..dba25d9734063ac02cdaeb0a5cd5432473f6372e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3081,13 +3081,18 @@  static inline int __wp_page_copy_user(struct page *dst, struct page *src,
 			update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);
 	}
 
+	/* If the mm is a remote mm, copy in the page using access_remote_vm() */
+	if (current->mm != mm) {
+		if (access_remote_vm(mm, (unsigned long)uaddr, kaddr, PAGE_SIZE, 0) != PAGE_SIZE)
+			goto warn;
+	}
 	/*
 	 * This really shouldn't fail, because the page is there
 	 * in the page tables. But it might just be unreadable,
 	 * in which case we just give up and fill the result with
 	 * zeroes.
 	 */
-	if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
+	else if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
 		if (vmf->pte)
 			goto warn;