diff mbox series

mm/mremap: Fix address wraparound in move_page_tables()

Message ID 20241111-fix-mremap-32bit-wrap-v1-1-61d6be73b722@google.com (mailing list archive)
State New
Headers show
Series mm/mremap: Fix address wraparound in move_page_tables() | expand

Commit Message

Jann Horn Nov. 11, 2024, 7:34 p.m. UTC
On 32-bit platforms, it is possible for the expression
`len + old_addr < old_end` to be false-positive if `len + old_addr` wraps
around. `old_addr` is the cursor in the old range up to which page table
entries have been moved; so if the operation succeeded, `old_addr` is the
*end* of the old region, and adding `len` to it can wrap.

The overflow causes mremap() to mistakenly believe that PTEs have been
copied; the consequence is that mremap() bails out, but doesn't move the
PTEs back before the new VMA is unmapped, causing anonymous pages in the
region to be lost. So basically if userspace tries to mremap() a
private-anon region and hits this bug, mremap() will return an error and
the private-anon region's contents appear to have been zeroed.

The idea of this check is that `old_end - len` is the original start
address, and writing the check that way also makes it easier to read; so
fix the check by rearranging the comparison accordingly.

(An alternate fix would be to refactor this function by introducing an
"orig_old_start" variable or such.)

Cc: stable@vger.kernel.org
Fixes: af8ca1c14906 ("mm/mremap: optimize the start addresses in move_page_tables()")
Signed-off-by: Jann Horn <jannh@google.com>
---
Tested in a VM with a 32-bit X86 kernel; without the patch:

```
user@horn:~/big_mremap$ cat test.c
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <err.h>
#include <sys/mman.h>

#define ADDR1 ((void*)0x60000000)
#define ADDR2 ((void*)0x10000000)
#define SIZE          0x50000000uL

int main(void) {
  unsigned char *p1 = mmap(ADDR1, SIZE, PROT_READ|PROT_WRITE,
      MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED_NOREPLACE, -1, 0);
  if (p1 == MAP_FAILED)
    err(1, "mmap 1");
  unsigned char *p2 = mmap(ADDR2, SIZE, PROT_NONE,
      MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED_NOREPLACE, -1, 0);
  if (p2 == MAP_FAILED)
    err(1, "mmap 2");
  *p1 = 0x41;
  printf("first char is 0x%02hhx\n", *p1);
  unsigned char *p3 = mremap(p1, SIZE, SIZE,
      MREMAP_MAYMOVE|MREMAP_FIXED, p2);
  if (p3 == MAP_FAILED) {
    printf("mremap() failed; first char is 0x%02hhx\n", *p1);
  } else {
    printf("mremap() succeeded; first char is 0x%02hhx\n", *p3);
  }
}
user@horn:~/big_mremap$ gcc -static -o test test.c
user@horn:~/big_mremap$ setarch -R ./test
first char is 0x41
mremap() failed; first char is 0x00
```

With the patch:

```
user@horn:~/big_mremap$ setarch -R ./test
first char is 0x41
mremap() succeeded; first char is 0x41
```
---
 mm/mremap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


---
base-commit: 2d5404caa8c7bb5c4e0435f94b28834ae5456623
change-id: 20241111-fix-mremap-32bit-wrap-747105730f20

Comments

Liam R. Howlett Nov. 12, 2024, 1:51 a.m. UTC | #1
* Jann Horn <jannh@google.com> [241111 14:35]:
> On 32-bit platforms, it is possible for the expression
> `len + old_addr < old_end` to be false-positive if `len + old_addr` wraps
> around. `old_addr` is the cursor in the old range up to which page table
> entries have been moved; so if the operation succeeded, `old_addr` is the
> *end* of the old region, and adding `len` to it can wrap.
> 
> The overflow causes mremap() to mistakenly believe that PTEs have been
> copied; the consequence is that mremap() bails out, but doesn't move the
> PTEs back before the new VMA is unmapped, causing anonymous pages in the
> region to be lost. So basically if userspace tries to mremap() a
> private-anon region and hits this bug, mremap() will return an error and
> the private-anon region's contents appear to have been zeroed.
> 
> The idea of this check is that `old_end - len` is the original start
> address, and writing the check that way also makes it easier to read; so
> fix the check by rearranging the comparison accordingly.
> 
> (An alternate fix would be to refactor this function by introducing an
> "orig_old_start" variable or such.)
> 
> Cc: stable@vger.kernel.org
> Fixes: af8ca1c14906 ("mm/mremap: optimize the start addresses in move_page_tables()")
> Signed-off-by: Jann Horn <jannh@google.com>


Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>

> ---
> Tested in a VM with a 32-bit X86 kernel; without the patch:
> 
> ```
> user@horn:~/big_mremap$ cat test.c
> #define _GNU_SOURCE
> #include <stdlib.h>
> #include <stdio.h>
> #include <err.h>
> #include <sys/mman.h>
> 
> #define ADDR1 ((void*)0x60000000)
> #define ADDR2 ((void*)0x10000000)
> #define SIZE          0x50000000uL
> 
> int main(void) {
>   unsigned char *p1 = mmap(ADDR1, SIZE, PROT_READ|PROT_WRITE,
>       MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED_NOREPLACE, -1, 0);
>   if (p1 == MAP_FAILED)
>     err(1, "mmap 1");
>   unsigned char *p2 = mmap(ADDR2, SIZE, PROT_NONE,
>       MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED_NOREPLACE, -1, 0);
>   if (p2 == MAP_FAILED)
>     err(1, "mmap 2");
>   *p1 = 0x41;
>   printf("first char is 0x%02hhx\n", *p1);
>   unsigned char *p3 = mremap(p1, SIZE, SIZE,
>       MREMAP_MAYMOVE|MREMAP_FIXED, p2);
>   if (p3 == MAP_FAILED) {
>     printf("mremap() failed; first char is 0x%02hhx\n", *p1);
>   } else {
>     printf("mremap() succeeded; first char is 0x%02hhx\n", *p3);
>   }
> }
> user@horn:~/big_mremap$ gcc -static -o test test.c
> user@horn:~/big_mremap$ setarch -R ./test
> first char is 0x41
> mremap() failed; first char is 0x00
> ```
> 
> With the patch:
> 
> ```
> user@horn:~/big_mremap$ setarch -R ./test
> first char is 0x41
> mremap() succeeded; first char is 0x41
> ```
> ---
>  mm/mremap.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/mm/mremap.c b/mm/mremap.c
> index dda09e957a5d4c2546934b796e862e5e0213b311..dee98ff2bbd64439200dddac16c4bd054537c2ed 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -648,7 +648,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  	 * Prevent negative return values when {old,new}_addr was realigned
>  	 * but we broke out of the above loop for the first PMD itself.
>  	 */
> -	if (len + old_addr < old_end)
> +	if (old_addr < old_end - len)
>  		return 0;
>  
>  	return len + old_addr - old_end;	/* how much done */
> 
> ---
> base-commit: 2d5404caa8c7bb5c4e0435f94b28834ae5456623
> change-id: 20241111-fix-mremap-32bit-wrap-747105730f20
> 
> -- 
> Jann Horn <jannh@google.com>
>
Qi Zheng Nov. 12, 2024, 3 a.m. UTC | #2
On 2024/11/12 03:34, Jann Horn wrote:
> On 32-bit platforms, it is possible for the expression
> `len + old_addr < old_end` to be false-positive if `len + old_addr` wraps
> around. `old_addr` is the cursor in the old range up to which page table
> entries have been moved; so if the operation succeeded, `old_addr` is the
> *end* of the old region, and adding `len` to it can wrap.
> 
> The overflow causes mremap() to mistakenly believe that PTEs have been
> copied; the consequence is that mremap() bails out, but doesn't move the
> PTEs back before the new VMA is unmapped, causing anonymous pages in the
> region to be lost. So basically if userspace tries to mremap() a
> private-anon region and hits this bug, mremap() will return an error and
> the private-anon region's contents appear to have been zeroed.
> 
> The idea of this check is that `old_end - len` is the original start
> address, and writing the check that way also makes it easier to read; so
> fix the check by rearranging the comparison accordingly.
> 
> (An alternate fix would be to refactor this function by introducing an
> "orig_old_start" variable or such.)
> 
> Cc: stable@vger.kernel.org
> Fixes: af8ca1c14906 ("mm/mremap: optimize the start addresses in move_page_tables()")
> Signed-off-by: Jann Horn <jannh@google.com>

Acked-by: Qi Zheng <zhengqi.arch@bytedance.com>

Thanks!
Lorenzo Stoakes Nov. 12, 2024, 9:36 a.m. UTC | #3
On Mon, Nov 11, 2024 at 08:34:30PM +0100, Jann Horn wrote:
> On 32-bit platforms, it is possible for the expression
> `len + old_addr < old_end` to be false-positive if `len + old_addr` wraps
> around. `old_addr` is the cursor in the old range up to which page table
> entries have been moved; so if the operation succeeded, `old_addr` is the
> *end* of the old region, and adding `len` to it can wrap.
>
> The overflow causes mremap() to mistakenly believe that PTEs have been
> copied; the consequence is that mremap() bails out, but doesn't move the
> PTEs back before the new VMA is unmapped, causing anonymous pages in the
> region to be lost. So basically if userspace tries to mremap() a
> private-anon region and hits this bug, mremap() will return an error and
> the private-anon region's contents appear to have been zeroed.
>
> The idea of this check is that `old_end - len` is the original start
> address, and writing the check that way also makes it easier to read; so
> fix the check by rearranging the comparison accordingly.
>
> (An alternate fix would be to refactor this function by introducing an
> "orig_old_start" variable or such.)
>
> Cc: stable@vger.kernel.org
> Fixes: af8ca1c14906 ("mm/mremap: optimize the start addresses in move_page_tables()")
> Signed-off-by: Jann Horn <jannh@google.com>

Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

I would prefer the orig_old_start approach (maybe a different name :P) but
for the purpose of backporting and getting this fixed this is fine.

I also thought about using linux/overflow.h which has some nice helpers for
this kind of thing, but decided it didn't make sense since we have no
reason to risk overflow here.

I kind of hate this whole thing... but that can be part of a bigger mremap
refactoring when we bring the implementation into mm/vma.c... ;)

Thanks!

> ---
> Tested in a VM with a 32-bit X86 kernel; without the patch:
>
> ```
> user@horn:~/big_mremap$ cat test.c
> #define _GNU_SOURCE
> #include <stdlib.h>
> #include <stdio.h>
> #include <err.h>
> #include <sys/mman.h>
>
> #define ADDR1 ((void*)0x60000000)
> #define ADDR2 ((void*)0x10000000)
> #define SIZE          0x50000000uL
>
> int main(void) {
>   unsigned char *p1 = mmap(ADDR1, SIZE, PROT_READ|PROT_WRITE,
>       MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED_NOREPLACE, -1, 0);
>   if (p1 == MAP_FAILED)
>     err(1, "mmap 1");
>   unsigned char *p2 = mmap(ADDR2, SIZE, PROT_NONE,
>       MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED_NOREPLACE, -1, 0);
>   if (p2 == MAP_FAILED)
>     err(1, "mmap 2");
>   *p1 = 0x41;
>   printf("first char is 0x%02hhx\n", *p1);
>   unsigned char *p3 = mremap(p1, SIZE, SIZE,
>       MREMAP_MAYMOVE|MREMAP_FIXED, p2);
>   if (p3 == MAP_FAILED) {
>     printf("mremap() failed; first char is 0x%02hhx\n", *p1);
>   } else {
>     printf("mremap() succeeded; first char is 0x%02hhx\n", *p3);
>   }
> }
> user@horn:~/big_mremap$ gcc -static -o test test.c
> user@horn:~/big_mremap$ setarch -R ./test
> first char is 0x41
> mremap() failed; first char is 0x00
> ```
>
> With the patch:
>
> ```
> user@horn:~/big_mremap$ setarch -R ./test
> first char is 0x41
> mremap() succeeded; first char is 0x41
> ```
> ---
>  mm/mremap.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index dda09e957a5d4c2546934b796e862e5e0213b311..dee98ff2bbd64439200dddac16c4bd054537c2ed 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -648,7 +648,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  	 * Prevent negative return values when {old,new}_addr was realigned
>  	 * but we broke out of the above loop for the first PMD itself.
>  	 */
> -	if (len + old_addr < old_end)
> +	if (old_addr < old_end - len)
>  		return 0;
>
>  	return len + old_addr - old_end;	/* how much done */
>
> ---
> base-commit: 2d5404caa8c7bb5c4e0435f94b28834ae5456623
> change-id: 20241111-fix-mremap-32bit-wrap-747105730f20
>
> --
> Jann Horn <jannh@google.com>
>
Vlastimil Babka Nov. 12, 2024, 10:40 a.m. UTC | #4
On 11/11/24 20:34, Jann Horn wrote:
> On 32-bit platforms, it is possible for the expression
> `len + old_addr < old_end` to be false-positive if `len + old_addr` wraps
> around. `old_addr` is the cursor in the old range up to which page table
> entries have been moved; so if the operation succeeded, `old_addr` is the
> *end* of the old region, and adding `len` to it can wrap.
> 
> The overflow causes mremap() to mistakenly believe that PTEs have been
> copied; the consequence is that mremap() bails out, but doesn't move the
> PTEs back before the new VMA is unmapped, causing anonymous pages in the
> region to be lost. So basically if userspace tries to mremap() a
> private-anon region and hits this bug, mremap() will return an error and
> the private-anon region's contents appear to have been zeroed.
> 
> The idea of this check is that `old_end - len` is the original start
> address, and writing the check that way also makes it easier to read; so
> fix the check by rearranging the comparison accordingly.
> 
> (An alternate fix would be to refactor this function by introducing an
> "orig_old_start" variable or such.)
> 
> Cc: stable@vger.kernel.org
> Fixes: af8ca1c14906 ("mm/mremap: optimize the start addresses in move_page_tables()")
> Signed-off-by: Jann Horn <jannh@google.com>

Acked-by: Vlastimil Babka <vbabka@suse.cz>

Thanks!

> ---
> Tested in a VM with a 32-bit X86 kernel; without the patch:
> 
> ```
> user@horn:~/big_mremap$ cat test.c
> #define _GNU_SOURCE
> #include <stdlib.h>
> #include <stdio.h>
> #include <err.h>
> #include <sys/mman.h>
> 
> #define ADDR1 ((void*)0x60000000)
> #define ADDR2 ((void*)0x10000000)
> #define SIZE          0x50000000uL
> 
> int main(void) {
>   unsigned char *p1 = mmap(ADDR1, SIZE, PROT_READ|PROT_WRITE,
>       MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED_NOREPLACE, -1, 0);
>   if (p1 == MAP_FAILED)
>     err(1, "mmap 1");
>   unsigned char *p2 = mmap(ADDR2, SIZE, PROT_NONE,
>       MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED_NOREPLACE, -1, 0);
>   if (p2 == MAP_FAILED)
>     err(1, "mmap 2");
>   *p1 = 0x41;
>   printf("first char is 0x%02hhx\n", *p1);
>   unsigned char *p3 = mremap(p1, SIZE, SIZE,
>       MREMAP_MAYMOVE|MREMAP_FIXED, p2);
>   if (p3 == MAP_FAILED) {
>     printf("mremap() failed; first char is 0x%02hhx\n", *p1);
>   } else {
>     printf("mremap() succeeded; first char is 0x%02hhx\n", *p3);
>   }
> }
> user@horn:~/big_mremap$ gcc -static -o test test.c
> user@horn:~/big_mremap$ setarch -R ./test
> first char is 0x41
> mremap() failed; first char is 0x00
> ```
> 
> With the patch:
> 
> ```
> user@horn:~/big_mremap$ setarch -R ./test
> first char is 0x41
> mremap() succeeded; first char is 0x41
> ```
> ---
>  mm/mremap.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/mm/mremap.c b/mm/mremap.c
> index dda09e957a5d4c2546934b796e862e5e0213b311..dee98ff2bbd64439200dddac16c4bd054537c2ed 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -648,7 +648,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  	 * Prevent negative return values when {old,new}_addr was realigned
>  	 * but we broke out of the above loop for the first PMD itself.
>  	 */
> -	if (len + old_addr < old_end)
> +	if (old_addr < old_end - len)
>  		return 0;
>  
>  	return len + old_addr - old_end;	/* how much done */
> 
> ---
> base-commit: 2d5404caa8c7bb5c4e0435f94b28834ae5456623
> change-id: 20241111-fix-mremap-32bit-wrap-747105730f20
>
diff mbox series

Patch

diff --git a/mm/mremap.c b/mm/mremap.c
index dda09e957a5d4c2546934b796e862e5e0213b311..dee98ff2bbd64439200dddac16c4bd054537c2ed 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -648,7 +648,7 @@  unsigned long move_page_tables(struct vm_area_struct *vma,
 	 * Prevent negative return values when {old,new}_addr was realigned
 	 * but we broke out of the above loop for the first PMD itself.
 	 */
-	if (len + old_addr < old_end)
+	if (old_addr < old_end - len)
 		return 0;
 
 	return len + old_addr - old_end;	/* how much done */