diff mbox series

uprobes: use vm_special_mapping close() functionality

Message ID 20240903073629.2442754-1-svens@linux.ibm.com (mailing list archive)
State Handled Elsewhere
Headers show
Series uprobes: use vm_special_mapping close() functionality | expand

Commit Message

Sven Schnelle Sept. 3, 2024, 7:36 a.m. UTC
The following KASAN splat was shown:

[   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
[   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
[   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
[   44.505479]
[   44.505486] CPU: 51 UID: 0 PID: 1384 Comm: sh Not tainted 6.11.0-rc6-next-20240902-dirty #1496
[   44.505503] Hardware name: IBM 3931 A01 704 (z/VM 7.3.0)
[   44.505508] Call Trace:
[   44.505511]  [<000b0324d2f78080>] dump_stack_lvl+0xd0/0x108
[   44.505521]  [<000b0324d2f5435c>] print_address_description.constprop.0+0x34/0x2e0
[   44.505529]  [<000b0324d2f5464c>] print_report+0x44/0x138
[   44.505536]  [<000b0324d1383192>] kasan_report+0xc2/0x140
[   44.505543]  [<000b0324d2f52904>] special_mapping_close+0x9c/0xc8
[   44.505550]  [<000b0324d12c7978>] remove_vma+0x78/0x120
[   44.505557]  [<000b0324d128a2c6>] exit_mmap+0x326/0x750
[   44.505563]  [<000b0324d0ba655a>] __mmput+0x9a/0x370
[   44.505570]  [<000b0324d0bbfbe0>] exit_mm+0x240/0x340
[   44.505575]  [<000b0324d0bc0228>] do_exit+0x548/0xd70
[   44.505580]  [<000b0324d0bc1102>] do_group_exit+0x132/0x390
[   44.505586]  [<000b0324d0bc13b6>] __s390x_sys_exit_group+0x56/0x60
[   44.505592]  [<000b0324d0adcbd6>] do_syscall+0x2f6/0x430
[   44.505599]  [<000b0324d2f78434>] __do_syscall+0xa4/0x170
[   44.505606]  [<000b0324d2f9454c>] system_call+0x74/0x98
[   44.505614]
[   44.505616] Allocated by task 1384:
[   44.505621]  kasan_save_stack+0x40/0x70
[   44.505630]  kasan_save_track+0x28/0x40
[   44.505636]  __kasan_kmalloc+0xa0/0xc0
[   44.505642]  __create_xol_area+0xfa/0x410
[   44.505648]  get_xol_area+0xb0/0xf0
[   44.505652]  uprobe_notify_resume+0x27a/0x470
[   44.505657]  irqentry_exit_to_user_mode+0x15e/0x1d0
[   44.505664]  pgm_check_handler+0x122/0x170
[   44.505670]
[   44.505672] Freed by task 1384:
[   44.505676]  kasan_save_stack+0x40/0x70
[   44.505682]  kasan_save_track+0x28/0x40
[   44.505687]  kasan_save_free_info+0x4a/0x70
[   44.505693]  __kasan_slab_free+0x5a/0x70
[   44.505698]  kfree+0xe8/0x3f0
[   44.505704]  __mmput+0x20/0x370
[   44.505709]  exit_mm+0x240/0x340
[   44.505713]  do_exit+0x548/0xd70
[   44.505718]  do_group_exit+0x132/0x390
[   44.505722]  __s390x_sys_exit_group+0x56/0x60
[   44.505727]  do_syscall+0x2f6/0x430
[   44.505732]  __do_syscall+0xa4/0x170
[   44.505738]  system_call+0x74/0x98

The problem is that uprobe_clear_state() kfree's struct xol_area, which
contains struct vm_special_mapping *xol_mapping. This one is passed to
_install_special_mapping() in xol_add_vma().
__mput reads:

static inline void __mmput(struct mm_struct *mm)
{
        VM_BUG_ON(atomic_read(&mm->mm_users));

        uprobe_clear_state(mm);
        exit_aio(mm);
        ksm_exit(mm);
        khugepaged_exit(mm); /* must run before exit_mmap */
        exit_mmap(mm);
        ...
}

So uprobe_clear_state() in the beginning free's the memory area
containing the vm_special_mapping data, but exit_mmap() uses this
address later via vma->vm_private_data (which was set in
_install_special_mapping().

Fix this by moving uprobe_clear_state() to uprobes.c and use it as
close() callback.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
---
 include/linux/uprobes.h |  1 -
 kernel/events/uprobes.c | 39 ++++++++++++++++++++-------------------
 kernel/fork.c           |  1 -
 3 files changed, 20 insertions(+), 21 deletions(-)

Comments

Sven Schnelle Sept. 3, 2024, 7:49 a.m. UTC | #1
Hi Michael,

Sven Schnelle <svens@linux.ibm.com> writes:

> The following KASAN splat was shown:
>
> [   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
> [   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
> [   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
> [   44.505479]
> [   44.505486] CPU: 51 UID: 0 PID: 1384 Comm: sh Not tainted 6.11.0-rc6-next-20240902-dirty #1496
> [   44.505503] Hardware name: IBM 3931 A01 704 (z/VM 7.3.0)
> [   44.505508] Call Trace:
> [   44.505511]  [<000b0324d2f78080>] dump_stack_lvl+0xd0/0x108
> [   44.505521]  [<000b0324d2f5435c>] print_address_description.constprop.0+0x34/0x2e0
> [   44.505529]  [<000b0324d2f5464c>] print_report+0x44/0x138
> [   44.505536]  [<000b0324d1383192>] kasan_report+0xc2/0x140
> [   44.505543]  [<000b0324d2f52904>] special_mapping_close+0x9c/0xc8
> [   44.505550]  [<000b0324d12c7978>] remove_vma+0x78/0x120
> [   44.505557]  [<000b0324d128a2c6>] exit_mmap+0x326/0x750
> [   44.505563]  [<000b0324d0ba655a>] __mmput+0x9a/0x370
> [   44.505570]  [<000b0324d0bbfbe0>] exit_mm+0x240/0x340
> [   44.505575]  [<000b0324d0bc0228>] do_exit+0x548/0xd70
> [   44.505580]  [<000b0324d0bc1102>] do_group_exit+0x132/0x390
> [   44.505586]  [<000b0324d0bc13b6>] __s390x_sys_exit_group+0x56/0x60
> [   44.505592]  [<000b0324d0adcbd6>] do_syscall+0x2f6/0x430
> [   44.505599]  [<000b0324d2f78434>] __do_syscall+0xa4/0x170
> [   44.505606]  [<000b0324d2f9454c>] system_call+0x74/0x98
> [   44.505614]
> [   44.505616] Allocated by task 1384:
> [   44.505621]  kasan_save_stack+0x40/0x70
> [   44.505630]  kasan_save_track+0x28/0x40
> [   44.505636]  __kasan_kmalloc+0xa0/0xc0
> [   44.505642]  __create_xol_area+0xfa/0x410
> [   44.505648]  get_xol_area+0xb0/0xf0
> [   44.505652]  uprobe_notify_resume+0x27a/0x470
> [   44.505657]  irqentry_exit_to_user_mode+0x15e/0x1d0
> [   44.505664]  pgm_check_handler+0x122/0x170
> [   44.505670]
> [   44.505672] Freed by task 1384:
> [   44.505676]  kasan_save_stack+0x40/0x70
> [   44.505682]  kasan_save_track+0x28/0x40
> [   44.505687]  kasan_save_free_info+0x4a/0x70
> [   44.505693]  __kasan_slab_free+0x5a/0x70
> [   44.505698]  kfree+0xe8/0x3f0
> [   44.505704]  __mmput+0x20/0x370
> [   44.505709]  exit_mm+0x240/0x340
> [   44.505713]  do_exit+0x548/0xd70
> [   44.505718]  do_group_exit+0x132/0x390
> [   44.505722]  __s390x_sys_exit_group+0x56/0x60
> [   44.505727]  do_syscall+0x2f6/0x430
> [   44.505732]  __do_syscall+0xa4/0x170
> [   44.505738]  system_call+0x74/0x98
> [..]

As this has a dependency on your special mapping close series, do you
want to carry that with your patches?
Oleg Nesterov Sept. 3, 2024, 9:08 a.m. UTC | #2
On 09/03, Sven Schnelle wrote:
>
> [   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
> [   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
> [   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
> [   44.505479]
> [   44.505486] CPU: 51 UID: 0 PID: 1384 Comm: sh Not tainted 6.11.0-rc6-next-20240902-dirty #1496
> [   44.505503] Hardware name: IBM 3931 A01 704 (z/VM 7.3.0)
> [   44.505508] Call Trace:
> [   44.505511]  [<000b0324d2f78080>] dump_stack_lvl+0xd0/0x108
> [   44.505521]  [<000b0324d2f5435c>] print_address_description.constprop.0+0x34/0x2e0
> [   44.505529]  [<000b0324d2f5464c>] print_report+0x44/0x138
> [   44.505536]  [<000b0324d1383192>] kasan_report+0xc2/0x140
> [   44.505543]  [<000b0324d2f52904>] special_mapping_close+0x9c/0xc8
                                       ^^^^^^^^^^^^^^^^^^^^^
Caused by

	[PATCH v2 1/4] mm: Add optional close() to struct vm_special_mapping
	https://lore.kernel.org/all/20240812082605.743814-1-mpe@ellerman.id.au/

?

> +static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> +{
> +	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
> +
> +	mutex_lock(&delayed_uprobe_lock);
> +	delayed_uprobe_remove(NULL, vma->vm_mm);
> +	mutex_unlock(&delayed_uprobe_lock);
> +
> +	if (!area)
> +		return;
> +
> +	put_page(area->pages[0]);
> +	kfree(area->bitmap);
> +	kfree(area);
> +}
> +
>  static struct xol_area *__create_xol_area(unsigned long vaddr)
>  {
>  	struct mm_struct *mm = current->mm;
> @@ -1481,6 +1500,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
>
>  	area->xol_mapping.name = "[uprobes]";
>  	area->xol_mapping.fault = NULL;
> +	area->xol_mapping.close = uprobe_clear_state;

LGTM.

but with or without this fix __create_xol_area() also needs

	area->xol_mapping.mremap = NULL;

?

And in the longer term I think we should probably add a single instance
of "struct vm_special_mapping uprobe_special_mapping with ->fault != NULL
but this is another issue.

Oleg.
Sven Schnelle Sept. 3, 2024, 9:32 a.m. UTC | #3
Oleg Nesterov <oleg@redhat.com> writes:

> On 09/03, Sven Schnelle wrote:
>>
>> [   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
>> [   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
>> [   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
>> [   44.505479]
>> [   44.505486] CPU: 51 UID: 0 PID: 1384 Comm: sh Not tainted 6.11.0-rc6-next-20240902-dirty #1496
>> [   44.505503] Hardware name: IBM 3931 A01 704 (z/VM 7.3.0)
>> [   44.505508] Call Trace:
>> [   44.505511]  [<000b0324d2f78080>] dump_stack_lvl+0xd0/0x108
>> [   44.505521]  [<000b0324d2f5435c>] print_address_description.constprop.0+0x34/0x2e0
>> [   44.505529]  [<000b0324d2f5464c>] print_report+0x44/0x138
>> [   44.505536]  [<000b0324d1383192>] kasan_report+0xc2/0x140
>> [   44.505543]  [<000b0324d2f52904>] special_mapping_close+0x9c/0xc8
>                                        ^^^^^^^^^^^^^^^^^^^^^
> Caused by
>
> 	[PATCH v2 1/4] mm: Add optional close() to struct vm_special_mapping
> 	https://lore.kernel.org/all/20240812082605.743814-1-mpe@ellerman.id.au/
>
> ?
>
>> +static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
>> +{
>> +	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
>> +
>> +	mutex_lock(&delayed_uprobe_lock);
>> +	delayed_uprobe_remove(NULL, vma->vm_mm);
>> +	mutex_unlock(&delayed_uprobe_lock);
>> +
>> +	if (!area)
>> +		return;
>> +
>> +	put_page(area->pages[0]);
>> +	kfree(area->bitmap);
>> +	kfree(area);
>> +}
>> +
>>  static struct xol_area *__create_xol_area(unsigned long vaddr)
>>  {
>>  	struct mm_struct *mm = current->mm;
>> @@ -1481,6 +1500,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
>>
>>  	area->xol_mapping.name = "[uprobes]";
>>  	area->xol_mapping.fault = NULL;
>> +	area->xol_mapping.close = uprobe_clear_state;
>
> LGTM.
>
> but with or without this fix __create_xol_area() also needs
>
> 	area->xol_mapping.mremap = NULL;
>
> ?

I think the code should just use kzalloc() instead of kmalloc() so all
members are cleared. I noticed that when looking into the issue because
the new close member of xol_mapping wasn't initialized. My plan was to
send that change as a separate patch - because it's not related to this
issue.
Linus Torvalds Sept. 3, 2024, 7:12 p.m. UTC | #4
On Tue, 3 Sept 2024 at 02:09, Oleg Nesterov <oleg@redhat.com> wrote:
>
> but with or without this fix __create_xol_area() also needs
>
>         area->xol_mapping.mremap = NULL;

I think the whole thing needs to be zeroed out.

It was always horribly buggy. The close thing just made it more
*obviously* buggy, because closing a vma is a lot more common than
mremap'ing it.

Either use kzalloc(), or do a proper initializer something like this:

-       area->xol_mapping.name = "[uprobes]";
-       area->xol_mapping.fault = NULL;
-       area->xol_mapping.pages = area->pages;
+       area->xol_mapping = (struct vm_special_mapping) {
+               .name = "[uprobes]",
+               .pages = area->pages,
+               .close = uprobe_clear_state,
+       };

which should initialize the struct vm_special_mapping fully.

                     Linus
Sven Schnelle Sept. 3, 2024, 7:31 p.m. UTC | #5
Hi Linus,

Linus Torvalds <torvalds@linux-foundation.org> writes:

> On Tue, 3 Sept 2024 at 02:09, Oleg Nesterov <oleg@redhat.com> wrote:
>>
>> but with or without this fix __create_xol_area() also needs
>>
>>         area->xol_mapping.mremap = NULL;
>
> I think the whole thing needs to be zeroed out.
>
> It was always horribly buggy. The close thing just made it more
> *obviously* buggy, because closing a vma is a lot more common than
> mremap'ing it.
>
> Either use kzalloc(), or do a proper initializer something like this:

I sent a patch which does this today:

https://lore.kernel.org/all/20240903102313.3402529-1-svens@linux.ibm.com/
Oleg Nesterov Sept. 3, 2024, 7:32 p.m. UTC | #6
On 09/03, Linus Torvalds wrote:
>
> On Tue, 3 Sept 2024 at 02:09, Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > but with or without this fix __create_xol_area() also needs
> >
> >         area->xol_mapping.mremap = NULL;
>
> I think the whole thing needs to be zeroed out.

Agreed,

> Either use kzalloc(),

This is what Sven did in

	[PATCH] uprobes: use kzalloc to allocate xol area
	https://lore.kernel.org/all/20240903102313.3402529-1-svens@linux.ibm.com/

Oleg.
Linus Torvalds Sept. 3, 2024, 7:34 p.m. UTC | #7
On Tue, 3 Sept 2024 at 12:31, Sven Schnelle <svens@linux.ibm.com> wrote:
>
> > Either use kzalloc(), or do a proper initializer something like this:
>
> I sent a patch which does this today:

Ack, looks good.

This should probably be backported independently of the new close()
series, since the mremap issue seems to be a longstanding issue.

                     Linus
Michael Ellerman Sept. 4, 2024, 3:57 a.m. UTC | #8
Sven Schnelle <svens@linux.ibm.com> writes:
> Hi Michael,

Hi Sven,

> Sven Schnelle <svens@linux.ibm.com> writes:
>
>> The following KASAN splat was shown:
>>
>> [   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
>> [   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
>> [   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
...
>> [..]
>
> As this has a dependency on your special mapping close series, do you
> want to carry that with your patches?

Andrew has my series in mm-stable, so I think this should go into mm as
well. I assume he will pick it up.

cheers
Oleg Nesterov Sept. 4, 2024, 9:56 a.m. UTC | #9
I didn't have time to write a full reply yesterday.

On 09/03, Linus Torvalds wrote:
>
> On Tue, 3 Sept 2024 at 02:09, Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > but with or without this fix __create_xol_area() also needs
> >
> >         area->xol_mapping.mremap = NULL;
>
> I think the whole thing needs to be zeroed out.

Again, this is what Sven did and I agree with this fix for now.

> It was always horribly buggy. The close thing just made it more
> *obviously* buggy, because closing a vma is a lot more common than
> mremap'ing it.

Well, this code is very old, I don't think it was "always" buggy.
But at least today it is horribly ugly, and

> Either use kzalloc(), or do a proper initializer something like this:
>
> -       area->xol_mapping.name = "[uprobes]";
> -       area->xol_mapping.fault = NULL;
> -       area->xol_mapping.pages = area->pages;
> +       area->xol_mapping = (struct vm_special_mapping) {
> +               .name = "[uprobes]",
> +               .pages = area->pages,
> +               .close = uprobe_clear_state,
> +       };

either way the code is still ugly, imo.

How about the (untested) patch below?

I am not going to send this patch right now, it conflicts with the ongoing
close/mremap changes, but what do you think?

We do not need to add the instance of vm_special_mapping into mm_struct,
a single instance (xol_mapping below) with .fault = xol_fault() is enough.
And, with this change xol_area no longer needs "struct page *pages[2]", it
can be turned into "struct page *page".

Oleg.
---

--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -101,7 +101,6 @@ struct xol_area {
 	atomic_t 			slot_count;	/* number of in-use slots */
 	unsigned long 			*bitmap;	/* 0 = free slot */
 
-	struct vm_special_mapping	xol_mapping;
 	struct page 			*pages[2];
 	/*
 	 * We keep the vma's vm_start rather than a pointer to the vma
@@ -1453,6 +1452,21 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 		set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
 }
 
+static vm_fault_t xol_fault(const struct vm_special_mapping *sm,
+			    struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct xol_area *area = vma->vm_mm->uprobes_state.xol_area;
+
+	vmf->page = area->pages[0];
+	get_page(vmf->page);
+	return 0;
+}
+
+static const struct vm_special_mapping xol_mapping = {
+	.name = "[uprobes]",
+	.fault = xol_fault,
+};
+
 /* Slot allocation for XOL */
 static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 {
@@ -1479,7 +1493,7 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 
 	vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
 				VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
-				&area->xol_mapping);
+				&xol_mapping);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto fail;
@@ -1518,9 +1532,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
 	if (!area->bitmap)
 		goto free_area;
 
-	area->xol_mapping.name = "[uprobes]";
-	area->xol_mapping.fault = NULL;
-	area->xol_mapping.pages = area->pages;
 	area->pages[0] = alloc_page(GFP_HIGHUSER);
 	if (!area->pages[0])
 		goto free_bitmap;
Oleg Nesterov Sept. 4, 2024, 10:03 a.m. UTC | #10
I didn't have time to write a full reply yesterday.

On 09/03, Linus Torvalds wrote:
>
> On Tue, 3 Sept 2024 at 02:09, Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > but with or without this fix __create_xol_area() also needs
> >
> >         area->xol_mapping.mremap = NULL;
>
> I think the whole thing needs to be zeroed out.

Again, this is what Sven did and I agree with this fix for now.

> It was always horribly buggy. The close thing just made it more
> *obviously* buggy, because closing a vma is a lot more common than
> mremap'ing it.

Well, this code is very old, I don't think it was "always" buggy.
But at least today it is horribly ugly, and

> Either use kzalloc(), or do a proper initializer something like this:
>
> -       area->xol_mapping.name = "[uprobes]";
> -       area->xol_mapping.fault = NULL;
> -       area->xol_mapping.pages = area->pages;
> +       area->xol_mapping = (struct vm_special_mapping) {
> +               .name = "[uprobes]",
> +               .pages = area->pages,
> +               .close = uprobe_clear_state,
> +       };

either way the code is still ugly, imo.

How about the (untested) patch below?

I am not going to send this patch right now, it conflicts with the ongoing
xol_mapping.close changes, but what do you think?

We do not need to add the instance of vm_special_mapping into mm_struct,
a single instance (xol_mapping below) with .fault = xol_fault() is enough.
And, with this change xol_area no longer needs "struct page *pages[2]", it
can be turned into "struct page *page".

Oleg.
---

--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -101,7 +101,6 @@ struct xol_area {
 	atomic_t 			slot_count;	/* number of in-use slots */
 	unsigned long 			*bitmap;	/* 0 = free slot */
 
-	struct vm_special_mapping	xol_mapping;
 	struct page 			*pages[2];
 	/*
 	 * We keep the vma's vm_start rather than a pointer to the vma
@@ -1453,6 +1452,21 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 		set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
 }
 
+static vm_fault_t xol_fault(const struct vm_special_mapping *sm,
+			    struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct xol_area *area = vma->vm_mm->uprobes_state.xol_area;
+
+	vmf->page = area->pages[0];
+	get_page(vmf->page);
+	return 0;
+}
+
+static const struct vm_special_mapping xol_mapping = {
+	.name = "[uprobes]",
+	.fault = xol_fault,
+};
+
 /* Slot allocation for XOL */
 static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 {
@@ -1479,7 +1493,7 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 
 	vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
 				VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
-				&area->xol_mapping);
+				&xol_mapping);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto fail;
@@ -1518,9 +1532,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
 	if (!area->bitmap)
 		goto free_area;
 
-	area->xol_mapping.name = "[uprobes]";
-	area->xol_mapping.fault = NULL;
-	area->xol_mapping.pages = area->pages;
 	area->pages[0] = alloc_page(GFP_HIGHUSER);
 	if (!area->pages[0])
 		goto free_bitmap;
Andrew Morton Sept. 4, 2024, 9:26 p.m. UTC | #11
On Wed, 04 Sep 2024 13:57:13 +1000 Michael Ellerman <mpe@ellerman.id.au> wrote:

> Sven Schnelle <svens@linux.ibm.com> writes:
> > Hi Michael,
> 
> Hi Sven,
> 
> > Sven Schnelle <svens@linux.ibm.com> writes:
> >
> >> The following KASAN splat was shown:
> >>
> >> [   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
> >> [   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
> >> [   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
> ...
> >> [..]
> >
> > As this has a dependency on your special mapping close series, do you
> > want to carry that with your patches?
> 
> Andrew has my series in mm-stable, so I think this should go into mm as
> well. I assume he will pick it up.

yup, thanks.  Added, with

Fixes: 223febc6e557 ("mm: add optional close() to struct vm_special_mapping")

It appears that peterz is scooping up Sven's "uprobes: use kzalloc to
allocate xol area".
Oleg Nesterov Sept. 11, 2024, 9:44 a.m. UTC | #12
On 09/03, Sven Schnelle wrote:
>
> +static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> +{
> +	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
> +
> +	mutex_lock(&delayed_uprobe_lock);
> +	delayed_uprobe_remove(NULL, vma->vm_mm);
> +	mutex_unlock(&delayed_uprobe_lock);
> +
> +	if (!area)
> +		return;
> +
> +	put_page(area->pages[0]);
> +	kfree(area->bitmap);
> +	kfree(area);
> +}
> +
>  static struct xol_area *__create_xol_area(unsigned long vaddr)
>  {
>  	struct mm_struct *mm = current->mm;
> @@ -1481,6 +1500,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
>
>  	area->xol_mapping.name = "[uprobes]";
>  	area->xol_mapping.fault = NULL;
> +	area->xol_mapping.close = uprobe_clear_state;

Ah, no, we can't do this :/

A malicious application can munmap() its "[uprobes]" vma and free
area/pages/bitmap. If this application hits the uprobe breakpoint after
that it will use the freed memory.

And no, "mm->uprobes_state.xol_area = NULL" in uprobe_clear_state() won't
help. Say, another thread can sleep on area.wq when munmap() is called.

Sorry, I should have realized that immediately, but I didn't :/

Andrew, this is uprobes-use-vm_special_mapping-close-functionality.patch
in mm-stable

Oleg.
Oleg Nesterov Sept. 11, 2024, 9:57 a.m. UTC | #13
I guess VM_SEALED could help, but it depends on CONFIG_64BIT


On 09/11, Oleg Nesterov wrote:
>
> On 09/03, Sven Schnelle wrote:
> >
> > +static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> > +{
> > +	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
> > +
> > +	mutex_lock(&delayed_uprobe_lock);
> > +	delayed_uprobe_remove(NULL, vma->vm_mm);
> > +	mutex_unlock(&delayed_uprobe_lock);
> > +
> > +	if (!area)
> > +		return;
> > +
> > +	put_page(area->pages[0]);
> > +	kfree(area->bitmap);
> > +	kfree(area);
> > +}
> > +
> >  static struct xol_area *__create_xol_area(unsigned long vaddr)
> >  {
> >  	struct mm_struct *mm = current->mm;
> > @@ -1481,6 +1500,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
> >
> >  	area->xol_mapping.name = "[uprobes]";
> >  	area->xol_mapping.fault = NULL;
> > +	area->xol_mapping.close = uprobe_clear_state;
> 
> Ah, no, we can't do this :/
> 
> A malicious application can munmap() its "[uprobes]" vma and free
> area/pages/bitmap. If this application hits the uprobe breakpoint after
> that it will use the freed memory.
> 
> And no, "mm->uprobes_state.xol_area = NULL" in uprobe_clear_state() won't
> help. Say, another thread can sleep on area.wq when munmap() is called.
> 
> Sorry, I should have realized that immediately, but I didn't :/
> 
> Andrew, this is uprobes-use-vm_special_mapping-close-functionality.patch
> in mm-stable
> 
> Oleg.
Oleg Nesterov Sept. 11, 2024, 10:12 a.m. UTC | #14
Damn, sorry for the spam.

So I am going to send the patch from
https://lore.kernel.org/all/20240904095449.GA28781@redhat.com/

To me it looks like a good cleanup regardless, and unless I am totally
confused it should fix the problem with use-after-free.

Oleg.

On 09/11, Oleg Nesterov wrote:
>
> I guess VM_SEALED could help, but it depends on CONFIG_64BIT
>
>
> On 09/11, Oleg Nesterov wrote:
> >
> > On 09/03, Sven Schnelle wrote:
> > >
> > > +static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> > > +{
> > > +	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
> > > +
> > > +	mutex_lock(&delayed_uprobe_lock);
> > > +	delayed_uprobe_remove(NULL, vma->vm_mm);
> > > +	mutex_unlock(&delayed_uprobe_lock);
> > > +
> > > +	if (!area)
> > > +		return;
> > > +
> > > +	put_page(area->pages[0]);
> > > +	kfree(area->bitmap);
> > > +	kfree(area);
> > > +}
> > > +
> > >  static struct xol_area *__create_xol_area(unsigned long vaddr)
> > >  {
> > >  	struct mm_struct *mm = current->mm;
> > > @@ -1481,6 +1500,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
> > >
> > >  	area->xol_mapping.name = "[uprobes]";
> > >  	area->xol_mapping.fault = NULL;
> > > +	area->xol_mapping.close = uprobe_clear_state;
> >
> > Ah, no, we can't do this :/
> >
> > A malicious application can munmap() its "[uprobes]" vma and free
> > area/pages/bitmap. If this application hits the uprobe breakpoint after
> > that it will use the freed memory.
> >
> > And no, "mm->uprobes_state.xol_area = NULL" in uprobe_clear_state() won't
> > help. Say, another thread can sleep on area.wq when munmap() is called.
> >
> > Sorry, I should have realized that immediately, but I didn't :/
> >
> > Andrew, this is uprobes-use-vm_special_mapping-close-functionality.patch
> > in mm-stable
> >
> > Oleg.
diff mbox series

Patch

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index f50df1fa93e7..2ae96c98d287 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -128,7 +128,6 @@  extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
 extern void uprobe_notify_resume(struct pt_regs *regs);
 extern bool uprobe_deny_signal(void);
 extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
-extern void uprobe_clear_state(struct mm_struct *mm);
 extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
 extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 30348f13d4a7..ab19a43a4dfc 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1463,6 +1463,25 @@  void * __weak arch_uprobe_trampoline(unsigned long *psize)
 	return &insn;
 }
 
+/*
+ * uprobe_clear_state - Free the area allocated for slots.
+ */
+static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
+{
+	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
+
+	mutex_lock(&delayed_uprobe_lock);
+	delayed_uprobe_remove(NULL, vma->vm_mm);
+	mutex_unlock(&delayed_uprobe_lock);
+
+	if (!area)
+		return;
+
+	put_page(area->pages[0]);
+	kfree(area->bitmap);
+	kfree(area);
+}
+
 static struct xol_area *__create_xol_area(unsigned long vaddr)
 {
 	struct mm_struct *mm = current->mm;
@@ -1481,6 +1500,7 @@  static struct xol_area *__create_xol_area(unsigned long vaddr)
 
 	area->xol_mapping.name = "[uprobes]";
 	area->xol_mapping.fault = NULL;
+	area->xol_mapping.close = uprobe_clear_state;
 	area->xol_mapping.pages = area->pages;
 	area->pages[0] = alloc_page(GFP_HIGHUSER);
 	if (!area->pages[0])
@@ -1526,25 +1546,6 @@  static struct xol_area *get_xol_area(void)
 	return area;
 }
 
-/*
- * uprobe_clear_state - Free the area allocated for slots.
- */
-void uprobe_clear_state(struct mm_struct *mm)
-{
-	struct xol_area *area = mm->uprobes_state.xol_area;
-
-	mutex_lock(&delayed_uprobe_lock);
-	delayed_uprobe_remove(NULL, mm);
-	mutex_unlock(&delayed_uprobe_lock);
-
-	if (!area)
-		return;
-
-	put_page(area->pages[0]);
-	kfree(area->bitmap);
-	kfree(area);
-}
-
 void uprobe_start_dup_mmap(void)
 {
 	percpu_down_read(&dup_mmap_sem);
diff --git a/kernel/fork.c b/kernel/fork.c
index df8e4575ff01..ad0e16cf528b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1340,7 +1340,6 @@  static inline void __mmput(struct mm_struct *mm)
 {
 	VM_BUG_ON(atomic_read(&mm->mm_users));
 
-	uprobe_clear_state(mm);
 	exit_aio(mm);
 	ksm_exit(mm);
 	khugepaged_exit(mm); /* must run before exit_mmap */