diff mbox series

mm/vmalloc: Remove WARN_ON_ONCE related to adjust_va_to_fit_type

Message ID 20230922062704epcms1p1722f24d4489a0435b339ce21db754ded@epcms1p1 (mailing list archive)
State New
Headers show
Series mm/vmalloc: Remove WARN_ON_ONCE related to adjust_va_to_fit_type | expand

Commit Message

Jaeseon Sim Sept. 22, 2023, 6:27 a.m. UTC
There's panic issue as follows when do alloc_vmap_area:

Kernel panic - not syncing: kernel: panic_on_warn set ...

page allocation failure: order:0, mode:0x800(GFP_NOWAIT)
Call Trace:
warn_alloc+0xf4/0x190
__alloc_pages_slowpath+0xe0c/0xffc
__alloc_pages+0x250/0x2d0
new_slab+0x17c/0x4e0
___slab_alloc+0x4e4/0x8a8
__slab_alloc+0x34/0x6c
kmem_cache_alloc+0x20c/0x2f0
adjust_va_to_fit_type
__alloc_vmap_area
alloc_vmap_area+0x298/0x7fc
__get_vm_area_node+0x10c/0x1b4
__vmalloc_node_range+0x19c/0x7c0

Commit 1b23ff80b399 ("mm/vmalloc: invoke classify_va_fit_type() in
adjust_va_to_fit_type()") moved classify_va_fit_type() into
adjust_va_to_fit_type() and used WARN_ON_ONCE() to handle return
value of adjust_va_to_fit_type(), just as classify_va_fit_type()
was handled.

There is another path in adjust_va_to_fit_type() which could
return failure and will be handled in alloc_vmap_area().
Remove WARN_ON_ONCE() for this case.

Fixes: 45c62fc2897d ("mm/vmalloc: Remove WARN_ON_ONCE related to adjust_va_to_fit_type")
Signed-off-by: Jaeseon Sim <jason.sim@samsung.com>
---
 mm/vmalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

-- 
2.17.1

Comments

Baoquan He Sept. 22, 2023, 9:34 a.m. UTC | #1
Hi Jaeseon,

On 09/22/23 at 03:27pm, Jaeseon Sim wrote:
> There's panic issue as follows when do alloc_vmap_area:
> 
> Kernel panic - not syncing: kernel: panic_on_warn set ...
> 
> page allocation failure: order:0, mode:0x800(GFP_NOWAIT)
> Call Trace:
> warn_alloc+0xf4/0x190
> __alloc_pages_slowpath+0xe0c/0xffc
> __alloc_pages+0x250/0x2d0
> new_slab+0x17c/0x4e0
> ___slab_alloc+0x4e4/0x8a8
> __slab_alloc+0x34/0x6c
> kmem_cache_alloc+0x20c/0x2f0
> adjust_va_to_fit_type
> __alloc_vmap_area
> alloc_vmap_area+0x298/0x7fc
> __get_vm_area_node+0x10c/0x1b4
> __vmalloc_node_range+0x19c/0x7c0
> 
> Commit 1b23ff80b399 ("mm/vmalloc: invoke classify_va_fit_type() in
> adjust_va_to_fit_type()") moved classify_va_fit_type() into
> adjust_va_to_fit_type() and used WARN_ON_ONCE() to handle return
> value of adjust_va_to_fit_type(), just as classify_va_fit_type()
> was handled.

I don't get what you are fixing. In commit 1b23ff80b399, we have
"if (WARN_ON_ONCE(type == NOTHING_FIT))", it's the same as the current
code. You set panic_on_warn, it will panic in old code before commit
1b23ff80b399. Isn't it an expected behaviour?

> 
> There is another path in adjust_va_to_fit_type() which could
> return failure and will be handled in alloc_vmap_area().
> Remove WARN_ON_ONCE() for this case.
> 
> Fixes: 45c62fc2897d ("mm/vmalloc: Remove WARN_ON_ONCE related to adjust_va_to_fit_type")

The commit id for Fixes tag is wrong.

> Signed-off-by: Jaeseon Sim <jason.sim@samsung.com>
> ---
>  mm/vmalloc.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index ef8599d394fd..4a82b6525d48 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1522,7 +1522,7 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
>  
>          /* Update the free vmap_area. */
>          ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
> -        if (WARN_ON_ONCE(ret))
> +        if (ret)
>                  return vend;
>  
>  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
> @@ -4143,7 +4143,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
>                  ret = adjust_va_to_fit_type(&free_vmap_area_root,
>                                              &free_vmap_area_list,
>                                              va, start, size);
> -                if (WARN_ON_ONCE(unlikely(ret)))
> +                if (unlikely(ret))
>                          /* It is a BUG(), but trigger recovery instead. */
>                          goto recovery;
>  
> -- 
> 2.17.1
>
Baoquan He Sept. 22, 2023, 9:42 a.m. UTC | #2
On 09/22/23 at 05:34pm, Baoquan He wrote:
> Hi Jaeseon,
> 
> On 09/22/23 at 03:27pm, Jaeseon Sim wrote:
> > There's panic issue as follows when do alloc_vmap_area:
> > 
> > Kernel panic - not syncing: kernel: panic_on_warn set ...
> > 
> > page allocation failure: order:0, mode:0x800(GFP_NOWAIT)
> > Call Trace:
> > warn_alloc+0xf4/0x190
> > __alloc_pages_slowpath+0xe0c/0xffc
> > __alloc_pages+0x250/0x2d0
> > new_slab+0x17c/0x4e0
> > ___slab_alloc+0x4e4/0x8a8
> > __slab_alloc+0x34/0x6c
> > kmem_cache_alloc+0x20c/0x2f0
> > adjust_va_to_fit_type
> > __alloc_vmap_area
> > alloc_vmap_area+0x298/0x7fc
> > __get_vm_area_node+0x10c/0x1b4
> > __vmalloc_node_range+0x19c/0x7c0
> > 
> > Commit 1b23ff80b399 ("mm/vmalloc: invoke classify_va_fit_type() in
> > adjust_va_to_fit_type()") moved classify_va_fit_type() into
> > adjust_va_to_fit_type() and used WARN_ON_ONCE() to handle return
> > value of adjust_va_to_fit_type(), just as classify_va_fit_type()
> > was handled.
> 
> I don't get what you are fixing. In commit 1b23ff80b399, we have
                                   ~~ s/In/Before/, typo
> "if (WARN_ON_ONCE(type == NOTHING_FIT))", it's the same as the current
> code. You set panic_on_warn, it will panic in old code before commit
> 1b23ff80b399. Isn't it an expected behaviour?
> 
> > 
> > There is another path in adjust_va_to_fit_type() which could
> > return failure and will be handled in alloc_vmap_area().
> > Remove WARN_ON_ONCE() for this case.
> > 
> > Fixes: 45c62fc2897d ("mm/vmalloc: Remove WARN_ON_ONCE related to adjust_va_to_fit_type")
> 
> The commit id for Fixes tag is wrong.
> 
> > Signed-off-by: Jaeseon Sim <jason.sim@samsung.com>
> > ---
> >  mm/vmalloc.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > index ef8599d394fd..4a82b6525d48 100644
> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -1522,7 +1522,7 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
> >  
> >          /* Update the free vmap_area. */
> >          ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
> > -        if (WARN_ON_ONCE(ret))
> > +        if (ret)
> >                  return vend;
> >  
> >  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
> > @@ -4143,7 +4143,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
> >                  ret = adjust_va_to_fit_type(&free_vmap_area_root,
> >                                              &free_vmap_area_list,
> >                                              va, start, size);
> > -                if (WARN_ON_ONCE(unlikely(ret)))
> > +                if (unlikely(ret))
> >                          /* It is a BUG(), but trigger recovery instead. */
> >                          goto recovery;
> >  
> > -- 
> > 2.17.1
> > 
>
Uladzislau Rezki Sept. 22, 2023, 1:41 p.m. UTC | #3
On Fri, Sep 22, 2023 at 03:27:04PM +0900, Jaeseon Sim wrote:
> There's panic issue as follows when do alloc_vmap_area:
> 
> Kernel panic - not syncing: kernel: panic_on_warn set ...
> 
> page allocation failure: order:0, mode:0x800(GFP_NOWAIT)
> Call Trace:
> warn_alloc+0xf4/0x190
> __alloc_pages_slowpath+0xe0c/0xffc
> __alloc_pages+0x250/0x2d0
> new_slab+0x17c/0x4e0
> ___slab_alloc+0x4e4/0x8a8
> __slab_alloc+0x34/0x6c
> kmem_cache_alloc+0x20c/0x2f0
> adjust_va_to_fit_type
> __alloc_vmap_area
> alloc_vmap_area+0x298/0x7fc
> __get_vm_area_node+0x10c/0x1b4
> __vmalloc_node_range+0x19c/0x7c0
> 
> Commit 1b23ff80b399 ("mm/vmalloc: invoke classify_va_fit_type() in
> adjust_va_to_fit_type()") moved classify_va_fit_type() into
> adjust_va_to_fit_type() and used WARN_ON_ONCE() to handle return
> value of adjust_va_to_fit_type(), just as classify_va_fit_type()
> was handled.
> 
> There is another path in adjust_va_to_fit_type() which could
> return failure and will be handled in alloc_vmap_area().
> Remove WARN_ON_ONCE() for this case.
> 
> Fixes: 45c62fc2897d ("mm/vmalloc: Remove WARN_ON_ONCE related to adjust_va_to_fit_type")
>
This is i do not follow. The stack shows the warning in the __alloc_pages() path.

What does this patch fix?

--
Uladzislau Rezki
Jaeseon Sim Sept. 25, 2023, 10:51 a.m. UTC | #4
> On 09/22/23 at 05:34pm, Baoquan He wrote:
> > Hi Jaeseon,
Hello Baoquan,
> > 
> > On 09/22/23 at 03:27pm, Jaeseon Sim wrote:
> > > There's panic issue as follows when do alloc_vmap_area:
> > > 
> > > Kernel panic - not syncing: kernel: panic_on_warn set ...
> > > 
> > > page allocation failure: order:0, mode:0x800(GFP_NOWAIT)
> > > Call Trace:
> > > warn_alloc+0xf4/0x190
> > > __alloc_pages_slowpath+0xe0c/0xffc
> > > __alloc_pages+0x250/0x2d0
> > > new_slab+0x17c/0x4e0
> > > ___slab_alloc+0x4e4/0x8a8
> > > __slab_alloc+0x34/0x6c
> > > kmem_cache_alloc+0x20c/0x2f0
> > > adjust_va_to_fit_type
> > > __alloc_vmap_area
> > > alloc_vmap_area+0x298/0x7fc
> > > __get_vm_area_node+0x10c/0x1b4
> > > __vmalloc_node_range+0x19c/0x7c0

To Uladzislau,
Sorry. The path is as below.

Call trace:
 alloc_vmap_area+0x298/0x7fc
 __get_vm_area_node+0x10c/0x1b4
 __vmalloc_node_range+0x19c/0x7c0
 dup_task_struct+0x1b8/0x3b0
 copy_process+0x170/0xc40

> > > 
> > > Commit 1b23ff80b399 ("mm/vmalloc: invoke classify_va_fit_type() in
> > > adjust_va_to_fit_type()") moved classify_va_fit_type() into
> > > adjust_va_to_fit_type() and used WARN_ON_ONCE() to handle return
> > > value of adjust_va_to_fit_type(), just as classify_va_fit_type()
> > > was handled.
> > 
> > I don't get what you are fixing. In commit 1b23ff80b399, we have
>                                    ~~ s/In/Before/, typo
> > "if (WARN_ON_ONCE(type == NOTHING_FIT))", it's the same as the current
> > code. You set panic_on_warn, it will panic in old code before commit
> > 1b23ff80b399. Isn't it an expected behaviour?
There is a call path which didn't panic in old code, but does on the current.

static __always_inline int adjust_va_to_fit_type()

} else if (type == NE_FIT_TYPE) {
	lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
	if (!lva)
		return -1;

If the above path is taken, Retry path should be triggred at alloc_vmap_area(). 
But it is currenly unable to so.

> > 
> > > 
> > > There is another path in adjust_va_to_fit_type() which could
> > > return failure and will be handled in alloc_vmap_area().
> > > Remove WARN_ON_ONCE() for this case.
> > > 
> > > Fixes: 45c62fc2897d ("mm/vmalloc: Remove WARN_ON_ONCE related to adjust_va_to_fit_type")
> > 
> > The commit id for Fixes tag is wrong.
> > 
> > > Signed-off-by: Jaeseon Sim <jason.sim@samsung.com>
> > > ---
> > >  mm/vmalloc.c | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > > index ef8599d394fd..4a82b6525d48 100644
> > > --- a/mm/vmalloc.c
> > > +++ b/mm/vmalloc.c
> > > @@ -1522,7 +1522,7 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
> > >  
> > >          /* Update the free vmap_area. */
> > >          ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
> > > -        if (WARN_ON_ONCE(ret))
> > > +        if (ret)
> > >                  return vend;
> > >  
> > >  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
> > > @@ -4143,7 +4143,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
> > >                  ret = adjust_va_to_fit_type(&free_vmap_area_root,
> > >                                              &free_vmap_area_list,
> > >                                              va, start, size);
> > > -                if (WARN_ON_ONCE(unlikely(ret)))
> > > +                if (unlikely(ret))
> > >                          /* It is a BUG(), but trigger recovery instead. */
> > >                          goto recovery;
> > >  
> > > -- 
> > > 2.17.1
> > > 
> > 
>
Uladzislau Rezki Sept. 25, 2023, 1:40 p.m. UTC | #5
On Mon, Sep 25, 2023 at 07:51:54PM +0900, Jaeseon Sim wrote:
> > On 09/22/23 at 05:34pm, Baoquan He wrote:
> > > Hi Jaeseon,
> Hello Baoquan,
> > > 
> > > On 09/22/23 at 03:27pm, Jaeseon Sim wrote:
> > > > There's panic issue as follows when do alloc_vmap_area:
> > > > 
> > > > Kernel panic - not syncing: kernel: panic_on_warn set ...
> > > > 
> > > > page allocation failure: order:0, mode:0x800(GFP_NOWAIT)
> > > > Call Trace:
> > > > warn_alloc+0xf4/0x190
> > > > __alloc_pages_slowpath+0xe0c/0xffc
> > > > __alloc_pages+0x250/0x2d0
> > > > new_slab+0x17c/0x4e0
> > > > ___slab_alloc+0x4e4/0x8a8
> > > > __slab_alloc+0x34/0x6c
> > > > kmem_cache_alloc+0x20c/0x2f0
> > > > adjust_va_to_fit_type
> > > > __alloc_vmap_area
> > > > alloc_vmap_area+0x298/0x7fc
> > > > __get_vm_area_node+0x10c/0x1b4
> > > > __vmalloc_node_range+0x19c/0x7c0
> 
> To Uladzislau,
> Sorry. The path is as below.
> 
> Call trace:
>  alloc_vmap_area+0x298/0x7fc
>  __get_vm_area_node+0x10c/0x1b4
>  __vmalloc_node_range+0x19c/0x7c0
>  dup_task_struct+0x1b8/0x3b0
>  copy_process+0x170/0xc40
> 
> > > > 
> > > > Commit 1b23ff80b399 ("mm/vmalloc: invoke classify_va_fit_type() in
> > > > adjust_va_to_fit_type()") moved classify_va_fit_type() into
> > > > adjust_va_to_fit_type() and used WARN_ON_ONCE() to handle return
> > > > value of adjust_va_to_fit_type(), just as classify_va_fit_type()
> > > > was handled.
> > > 
> > > I don't get what you are fixing. In commit 1b23ff80b399, we have
> >                                    ~~ s/In/Before/, typo
> > > "if (WARN_ON_ONCE(type == NOTHING_FIT))", it's the same as the current
> > > code. You set panic_on_warn, it will panic in old code before commit
> > > 1b23ff80b399. Isn't it an expected behaviour?
> There is a call path which didn't panic in old code, but does on the current.
> 
> static __always_inline int adjust_va_to_fit_type()
> 
> } else if (type == NE_FIT_TYPE) {
> 	lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
> 	if (!lva)
> 		return -1;
> 
>
We do not have above code anymore:

<snip>
commit 82dd23e84be3ead53b6d584d836f51852d1096e6
Author: Uladzislau Rezki (Sony) <urezki@gmail.com>
Date:   Thu Jul 11 20:58:57 2019 -0700

    mm/vmalloc.c: preload a CPU with one object for split purpose

<snip>

Which kernel are you testing?

Thanks!

--
Uladzislau Rezki
Jaeseon Sim Sept. 26, 2023, 5:21 a.m. UTC | #6
> On Mon, Sep 25, 2023 at 07:51:54PM +0900, Jaeseon Sim wrote:
> > > On 09/22/23 at 05:34pm, Baoquan He wrote:
> > > > Hi Jaeseon,
> > Hello Baoquan,
> > > > 
> > > > On 09/22/23 at 03:27pm, Jaeseon Sim wrote:
> > > > > There's panic issue as follows when do alloc_vmap_area:
> > > > > 
> > > > > Kernel panic - not syncing: kernel: panic_on_warn set ...
> > > > > 
> > > > > page allocation failure: order:0, mode:0x800(GFP_NOWAIT)
> > > > > Call Trace:
> > > > > warn_alloc+0xf4/0x190
> > > > > __alloc_pages_slowpath+0xe0c/0xffc
> > > > > __alloc_pages+0x250/0x2d0
> > > > > new_slab+0x17c/0x4e0
> > > > > ___slab_alloc+0x4e4/0x8a8
> > > > > __slab_alloc+0x34/0x6c
> > > > > kmem_cache_alloc+0x20c/0x2f0
> > > > > adjust_va_to_fit_type
> > > > > __alloc_vmap_area
> > > > > alloc_vmap_area+0x298/0x7fc
> > > > > __get_vm_area_node+0x10c/0x1b4
> > > > > __vmalloc_node_range+0x19c/0x7c0
> > 
> > To Uladzislau,
> > Sorry. The path is as below.
> > 
> > Call trace:
> >  alloc_vmap_area+0x298/0x7fc
> >  __get_vm_area_node+0x10c/0x1b4
> >  __vmalloc_node_range+0x19c/0x7c0
> >  dup_task_struct+0x1b8/0x3b0
> >  copy_process+0x170/0xc40
> > 
> > > > > 
> > > > > Commit 1b23ff80b399 ("mm/vmalloc: invoke classify_va_fit_type() in
> > > > > adjust_va_to_fit_type()") moved classify_va_fit_type() into
> > > > > adjust_va_to_fit_type() and used WARN_ON_ONCE() to handle return
> > > > > value of adjust_va_to_fit_type(), just as classify_va_fit_type()
> > > > > was handled.
> > > > 
> > > > I don't get what you are fixing. In commit 1b23ff80b399, we have
> > >                                    ~~ s/In/Before/, typo
> > > > "if (WARN_ON_ONCE(type == NOTHING_FIT))", it's the same as the current
> > > > code. You set panic_on_warn, it will panic in old code before commit
> > > > 1b23ff80b399. Isn't it an expected behaviour?
> > There is a call path which didn't panic in old code, but does on the current.
> > 
> > static __always_inline int adjust_va_to_fit_type()
> > 
> > } else if (type == NE_FIT_TYPE) {
> > 	lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
> > 	if (!lva)
> > 		return -1;
> > 
> >
> We do not have above code anymore:
Sorry, I tried to say it in a simplified way and it caused a misunderstanding.

<snip>
static __always_inline int
adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
		      struct vmap_area *va, unsigned long nva_start_addr,
		      unsigned long size)

	} else if (type == NE_FIT_TYPE) {
		/*
		 * Split no edge of fit VA.
		 *
		 *     |       |
		 *   L V  NVA  V R
		 * |---|-------|---|
		 */
		lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
		if (unlikely(!lva)) {
			/*
			 * For percpu allocator we do not do any pre-allocation
			 * and leave it as it is. The reason is it most likely
			 * never ends up with NE_FIT_TYPE splitting. In case of
			 * percpu allocations offsets and sizes are aligned to
			 * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
			 * are its main fitting cases.
			 *
			 * There are a few exceptions though, as an example it is
			 * a first allocation (early boot up) when we have "one"
			 * big free space that has to be split.
			 *
			 * Also we can hit this path in case of regular "vmap"
			 * allocations, if "this" current CPU was not preloaded.
			 * See the comment in alloc_vmap_area() why. If so, then
			 * GFP_NOWAIT is used instead to get an extra object for
			 * split purpose. That is rare and most time does not
			 * occur.
			 *
			 * What happens if an allocation gets failed. Basically,
			 * an "overflow" path is triggered to purge lazily freed
			 * areas to free some memory, then, the "retry" path is
			 * triggered to repeat one more time. See more details
			 * in alloc_vmap_area() function.
			 */
			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
			if (!lva)
				return -1;
		}
<snip>

Above allocation fail will meet WARN_ON_ONCE in the current kernel now.
Should It be handled by alloc_vmap_area()?, as you described in a comment.

Thanks!
Jaeseon

> 
> <snip>
> commit 82dd23e84be3ead53b6d584d836f51852d1096e6
> Author: Uladzislau Rezki (Sony) <urezki@gmail.com>
> Date:   Thu Jul 11 20:58:57 2019 -0700
> 
>     mm/vmalloc.c: preload a CPU with one object for split purpose
> 
> <snip>
> 
> Which kernel are you testing?

I'm currently testing v6.1. 
The panic occurred during power on/off test.
> 
> Thanks!
> 
> --
> Uladzislau Rezki
Uladzislau Rezki Sept. 26, 2023, 6:05 a.m. UTC | #7
> > We do not have above code anymore:
> Sorry, I tried to say it in a simplified way and it caused a misunderstanding.
> 
> <snip>
> static __always_inline int
> adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
> 		      struct vmap_area *va, unsigned long nva_start_addr,
> 		      unsigned long size)
> 
> 	} else if (type == NE_FIT_TYPE) {
> 		/*
> 		 * Split no edge of fit VA.
> 		 *
> 		 *     |       |
> 		 *   L V  NVA  V R
> 		 * |---|-------|---|
> 		 */
> 		lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
> 		if (unlikely(!lva)) {
> 			/*
> 			 * For percpu allocator we do not do any pre-allocation
> 			 * and leave it as it is. The reason is it most likely
> 			 * never ends up with NE_FIT_TYPE splitting. In case of
> 			 * percpu allocations offsets and sizes are aligned to
> 			 * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
> 			 * are its main fitting cases.
> 			 *
> 			 * There are a few exceptions though, as an example it is
> 			 * a first allocation (early boot up) when we have "one"
> 			 * big free space that has to be split.
> 			 *
> 			 * Also we can hit this path in case of regular "vmap"
> 			 * allocations, if "this" current CPU was not preloaded.
> 			 * See the comment in alloc_vmap_area() why. If so, then
> 			 * GFP_NOWAIT is used instead to get an extra object for
> 			 * split purpose. That is rare and most time does not
> 			 * occur.
> 			 *
> 			 * What happens if an allocation gets failed. Basically,
> 			 * an "overflow" path is triggered to purge lazily freed
> 			 * areas to free some memory, then, the "retry" path is
> 			 * triggered to repeat one more time. See more details
> 			 * in alloc_vmap_area() function.
> 			 */
> 			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
> 			if (!lva)
> 				return -1;
> 		}
> <snip>
> 
> Above allocation fail will meet WARN_ON_ONCE in the current kernel now.
> Should It be handled by alloc_vmap_area()?, as you described in a comment.
> 
WARN_ONCE_ONCE() is a warning and not a panic, though your kernel config
considers it as a panic. Right, we go on retry path and we can remove
the warning only for GFP_NOWAIT-alloc-error. From the other hand we
should still have possibility to trigger a warning if an allocation
is not successful: no vmap space or low memory condition, thus no
physical memory.

> 
> > 
> > <snip>
> > commit 82dd23e84be3ead53b6d584d836f51852d1096e6
> > Author: Uladzislau Rezki (Sony) <urezki@gmail.com>
> > Date:   Thu Jul 11 20:58:57 2019 -0700
> > 
> >     mm/vmalloc.c: preload a CPU with one object for split purpose
> > 
> > <snip>
> > 
> > Which kernel are you testing?
> 
> I'm currently testing v6.1. 
> The panic occurred during power on/off test.
>
Could you please describe in more detail your test sequence, setup and HW?

--
Uladzislau Rezki
Jaeseon Sim Sept. 26, 2023, 12:05 p.m. UTC | #8
> > > We do not have above code anymore:
> > Sorry, I tried to say it in a simplified way and it caused a misunderstanding.
> > 
> > <snip>
> > static __always_inline int
> > adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
> > 		      struct vmap_area *va, unsigned long nva_start_addr,
> > 		      unsigned long size)
> > 
> > 	} else if (type == NE_FIT_TYPE) {
> > 		/*
> > 		 * Split no edge of fit VA.
> > 		 *
> > 		 *     |       |
> > 		 *   L V  NVA  V R
> > 		 * |---|-------|---|
> > 		 */
> > 		lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
> > 		if (unlikely(!lva)) {
> > 			/*
> > 			 * For percpu allocator we do not do any pre-allocation
> > 			 * and leave it as it is. The reason is it most likely
> > 			 * never ends up with NE_FIT_TYPE splitting. In case of
> > 			 * percpu allocations offsets and sizes are aligned to
> > 			 * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
> > 			 * are its main fitting cases.
> > 			 *
> > 			 * There are a few exceptions though, as an example it is
> > 			 * a first allocation (early boot up) when we have "one"
> > 			 * big free space that has to be split.
> > 			 *
> > 			 * Also we can hit this path in case of regular "vmap"
> > 			 * allocations, if "this" current CPU was not preloaded.
> > 			 * See the comment in alloc_vmap_area() why. If so, then
> > 			 * GFP_NOWAIT is used instead to get an extra object for
> > 			 * split purpose. That is rare and most time does not
> > 			 * occur.
> > 			 *
> > 			 * What happens if an allocation gets failed. Basically,
> > 			 * an "overflow" path is triggered to purge lazily freed
> > 			 * areas to free some memory, then, the "retry" path is
> > 			 * triggered to repeat one more time. See more details
> > 			 * in alloc_vmap_area() function.
> > 			 */
> > 			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
> > 			if (!lva)
> > 				return -1;
> > 		}
> > <snip>
> > 
> > Above allocation fail will meet WARN_ON_ONCE in the current kernel now.
> > Should It be handled by alloc_vmap_area()?, as you described in a comment.
> > 
> WARN_ONCE_ONCE() is a warning and not a panic, though your kernel config
> considers it as a panic. Right, we go on retry path and we can remove

Right, only in case panic_on_warn is enabled..

> the warning only for GFP_NOWAIT-alloc-error. From the other hand we
> should still have possibility to trigger a warning if an allocation
> is not successful: no vmap space or low memory condition, thus no
> physical memory.

Yes, but GFP_NOWAIT-alloc-error can easily occur for low-memory device.
How about changing fix as below?:

<snip>
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1468,6 +1468,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
                 */
                va->va_start = nva_start_addr + size;
        } else {
+               WARN_ON_ONCE(1);
                return -1;
        }
 
@@ -1522,7 +1523,7 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
 
        /* Update the free vmap_area. */
        ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
-       if (WARN_ON_ONCE(ret))
+       if (ret)
                return vend;
 
 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
@@ -4143,7 +4144,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
                ret = adjust_va_to_fit_type(&free_vmap_area_root,
                                            &free_vmap_area_list,
                                            va, start, size);
-               if (WARN_ON_ONCE(unlikely(ret)))
+               if (unlikely(ret))
                        /* It is a BUG(), but trigger recovery instead. */
                        goto recovery;
 
<snip>
It will WARN_ONCE_ONCE() only if classify_va_fit_type() is "(type == NOTHING_FIT)".

> 
> > 
> > > 
> > > <snip>
> > > commit 82dd23e84be3ead53b6d584d836f51852d1096e6
> > > Author: Uladzislau Rezki (Sony) <urezki@gmail.com>
> > > Date:   Thu Jul 11 20:58:57 2019 -0700
> > > 
> > >     mm/vmalloc.c: preload a CPU with one object for split purpose
> > > 
> > > <snip>
> > > 
> > > Which kernel are you testing?
> > 
> > I'm currently testing v6.1. 
> > The panic occurred during power on/off test.
> >
> Could you please describe in more detail your test sequence, setup and HW?

I'm testing on Samsung Exynos arm64 board with aosp platform, kernel v6.1.
I did power on/off test on hundreds of device for a week and
the same issue reproduced several times.

Thanks
Jaeseon

> 
> --
> Uladzislau Rezki
Uladzislau Rezki Sept. 26, 2023, 12:35 p.m. UTC | #9
On Tue, Sep 26, 2023 at 09:05:49PM +0900, Jaeseon Sim wrote:
> > > > We do not have above code anymore:
> > > Sorry, I tried to say it in a simplified way and it caused a misunderstanding.
> > > 
> > > <snip>
> > > static __always_inline int
> > > adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
> > > 		      struct vmap_area *va, unsigned long nva_start_addr,
> > > 		      unsigned long size)
> > > 
> > > 	} else if (type == NE_FIT_TYPE) {
> > > 		/*
> > > 		 * Split no edge of fit VA.
> > > 		 *
> > > 		 *     |       |
> > > 		 *   L V  NVA  V R
> > > 		 * |---|-------|---|
> > > 		 */
> > > 		lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
> > > 		if (unlikely(!lva)) {
> > > 			/*
> > > 			 * For percpu allocator we do not do any pre-allocation
> > > 			 * and leave it as it is. The reason is it most likely
> > > 			 * never ends up with NE_FIT_TYPE splitting. In case of
> > > 			 * percpu allocations offsets and sizes are aligned to
> > > 			 * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
> > > 			 * are its main fitting cases.
> > > 			 *
> > > 			 * There are a few exceptions though, as an example it is
> > > 			 * a first allocation (early boot up) when we have "one"
> > > 			 * big free space that has to be split.
> > > 			 *
> > > 			 * Also we can hit this path in case of regular "vmap"
> > > 			 * allocations, if "this" current CPU was not preloaded.
> > > 			 * See the comment in alloc_vmap_area() why. If so, then
> > > 			 * GFP_NOWAIT is used instead to get an extra object for
> > > 			 * split purpose. That is rare and most time does not
> > > 			 * occur.
> > > 			 *
> > > 			 * What happens if an allocation gets failed. Basically,
> > > 			 * an "overflow" path is triggered to purge lazily freed
> > > 			 * areas to free some memory, then, the "retry" path is
> > > 			 * triggered to repeat one more time. See more details
> > > 			 * in alloc_vmap_area() function.
> > > 			 */
> > > 			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
> > > 			if (!lva)
> > > 				return -1;
> > > 		}
> > > <snip>
> > > 
> > > Above allocation fail will meet WARN_ON_ONCE in the current kernel now.
> > > Should It be handled by alloc_vmap_area()?, as you described in a comment.
> > > 
> > WARN_ONCE_ONCE() is a warning and not a panic, though your kernel config
> > considers it as a panic. Right, we go on retry path and we can remove
> 
> Right, only in case panic_on_warn is enabled..
> 
> > the warning only for GFP_NOWAIT-alloc-error. From the other hand we
> > should still have possibility to trigger a warning if an allocation
> > is not successful: no vmap space or low memory condition, thus no
> > physical memory.
> 
> Yes, but GFP_NOWAIT-alloc-error can easily occur for low-memory device.
>
Agree. You are really in a low memory condition. We end up here only if
pre-loading also has not succeeded, i.e. GFP_KERNEL also fails.

But i agree with you, we should "improve the warning" because we drain
and repeat.

> How about changing fix as below?:
> 
> <snip>
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1468,6 +1468,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
>                  */
>                 va->va_start = nva_start_addr + size;
>         } else {
> +               WARN_ON_ONCE(1);
>                 return -1;
>         }
>  
> @@ -1522,7 +1523,7 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
>  
>         /* Update the free vmap_area. */
>         ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
> -       if (WARN_ON_ONCE(ret))
> +       if (ret)
>                 return vend;
>  
>  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
> @@ -4143,7 +4144,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
>                 ret = adjust_va_to_fit_type(&free_vmap_area_root,
>                                             &free_vmap_area_list,
>                                             va, start, size);
> -               if (WARN_ON_ONCE(unlikely(ret)))
> +               if (unlikely(ret))
>                         /* It is a BUG(), but trigger recovery instead. */
>                         goto recovery;
>  
> <snip>
> It will WARN_ONCE_ONCE() only if classify_va_fit_type() is "(type == NOTHING_FIT)".
> 
This is good but i think it should be improved further. We need to
understand from the warning when no memory and when there is no a
vmap space, so:

- if NOTHING_FIT, we should WARN() for sure;
- Second place in the pcpu_get_vm_area(), we do not use NE_FIT. Only in
  the begging after boot, but potentially we can trigger -ENOMEM and we
  should warn in this case. Otherwise you just hide it;
- And last one if after repeating we still do not manage to allocate.

--
Uladzislau Rezki
Uladzislau Rezki Sept. 27, 2023, 11:49 a.m. UTC | #10
> > Yes, but GFP_NOWAIT-alloc-error can easily occur for low-memory device.
> >
> Agree. You are really in a low memory condition. We end up here only if
> pre-loading also has not succeeded, i.e. GFP_KERNEL also fails.
> 
> But i agree with you, we should "improve the warning" because we drain
> and repeat.
> 
> > How about changing fix as below?:
> > 
> > <snip>
> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -1468,6 +1468,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
> >                  */
> >                 va->va_start = nva_start_addr + size;
> >         } else {
> > +               WARN_ON_ONCE(1);
> >                 return -1;
> >         }
> >  
> > @@ -1522,7 +1523,7 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
> >  
> >         /* Update the free vmap_area. */
> >         ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
> > -       if (WARN_ON_ONCE(ret))
> > +       if (ret)
> >                 return vend;
> >  
> >  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
> > @@ -4143,7 +4144,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
> >                 ret = adjust_va_to_fit_type(&free_vmap_area_root,
> >                                             &free_vmap_area_list,
> >                                             va, start, size);
> > -               if (WARN_ON_ONCE(unlikely(ret)))
> > +               if (unlikely(ret))
> >                         /* It is a BUG(), but trigger recovery instead. */
> >                         goto recovery;
> >  
> > <snip>
> > It will WARN_ONCE_ONCE() only if classify_va_fit_type() is "(type == NOTHING_FIT)".
> > 
> This is good but i think it should be improved further. We need to
> understand from the warning when no memory and when there is no a
> vmap space, so:
> 
> - if NOTHING_FIT, we should WARN() for sure;
> - Second place in the pcpu_get_vm_area(), we do not use NE_FIT. Only in
>   the begging after boot, but potentially we can trigger -ENOMEM and we
>   should warn in this case. Otherwise you just hide it;
> - And last one if after repeating we still do not manage to allocate.
> 

We should understand a reason of failing. I think error handling should
be improved. Something like:

<snip>
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ef8599d394fd..03a36921a3fc 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1454,7 +1454,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
 			 */
 			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
 			if (!lva)
-				return -1;
+				return -ENOMEM;
 		}
 
 		/*
@@ -1468,7 +1468,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
 		 */
 		va->va_start = nva_start_addr + size;
 	} else {
-		return -1;
+		return -EINVAL;
 	}
 
 	if (type != FL_FIT_TYPE) {
@@ -1488,7 +1488,8 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
 static __always_inline unsigned long
 __alloc_vmap_area(struct rb_root *root, struct list_head *head,
 	unsigned long size, unsigned long align,
-	unsigned long vstart, unsigned long vend)
+	unsigned long vstart, unsigned long vend,
+	int *error)
 {
 	bool adjust_search_size = true;
 	unsigned long nva_start_addr;
@@ -1508,8 +1509,10 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
 		adjust_search_size = false;
 
 	va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
-	if (unlikely(!va))
+	if (unlikely(!va)) {
+		*error = -ENOENT;
 		return vend;
+	}
 
 	if (va->va_start > vstart)
 		nva_start_addr = ALIGN(va->va_start, align);
@@ -1517,13 +1520,17 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
 		nva_start_addr = ALIGN(vstart, align);
 
 	/* Check the "vend" restriction. */
-	if (nva_start_addr + size > vend)
+	if (nva_start_addr + size > vend) {
+		*error = -ERANGE;
 		return vend;
+	}
 
 	/* Update the free vmap_area. */
 	ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
-	if (WARN_ON_ONCE(ret))
+	if (ret) {
+		*error = ret;
 		return vend;
+	}
 
 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
 	find_vmap_lowest_match_check(root, head, size, align);
@@ -1589,7 +1596,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	unsigned long freed;
 	unsigned long addr;
 	int purged = 0;
-	int ret;
+	int ret, error;
 
 	if (unlikely(!size || offset_in_page(size) || !is_power_of_2(align)))
 		return ERR_PTR(-EINVAL);
@@ -1613,7 +1620,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 retry:
 	preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
 	addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
-		size, align, vstart, vend);
+		size, align, vstart, vend, &error);
 	spin_unlock(&free_vmap_area_lock);
 
 	trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend);
@@ -1662,8 +1669,9 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	}
 
 	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
-		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
-			size);
+		pr_warn("vmap allocation for size %lu failed: "
+			"use vmalloc=<size> to increase size, errno: (%d)\n",
+			size, error);
 
 	kmem_cache_free(vmap_area_cachep, va);
 	return ERR_PTR(-EBUSY);
@@ -4143,9 +4151,10 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 		ret = adjust_va_to_fit_type(&free_vmap_area_root,
 					    &free_vmap_area_list,
 					    va, start, size);
-		if (WARN_ON_ONCE(unlikely(ret)))
-			/* It is a BUG(), but trigger recovery instead. */
+		if (unlikely(ret)) {
+			WARN_ONCE(1, "%s error: errno (%d)\n", __func__, ret);
 			goto recovery;
+		}
 
 		/* Allocated area. */
 		va = vas[area];
<snip>

Any thoughts?

--
Uladzislau Rezki
Baoquan He Sept. 27, 2023, 1:33 p.m. UTC | #11
On 09/27/23 at 01:49pm, Uladzislau Rezki wrote:
> > > Yes, but GFP_NOWAIT-alloc-error can easily occur for low-memory device.
> > >
> > Agree. You are really in a low memory condition. We end up here only if
> > pre-loading also has not succeeded, i.e. GFP_KERNEL also fails.
> > 
> > But i agree with you, we should "improve the warning" because we drain
> > and repeat.
> > 
> > > How about changing fix as below?:
> > > 
> > > <snip>
> > > --- a/mm/vmalloc.c
> > > +++ b/mm/vmalloc.c
> > > @@ -1468,6 +1468,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
> > >                  */
> > >                 va->va_start = nva_start_addr + size;
> > >         } else {
> > > +               WARN_ON_ONCE(1);
> > >                 return -1;
> > >         }
> > >  
> > > @@ -1522,7 +1523,7 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
> > >  
> > >         /* Update the free vmap_area. */
> > >         ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
> > > -       if (WARN_ON_ONCE(ret))
> > > +       if (ret)
> > >                 return vend;
> > >  
> > >  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
> > > @@ -4143,7 +4144,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
> > >                 ret = adjust_va_to_fit_type(&free_vmap_area_root,
> > >                                             &free_vmap_area_list,
> > >                                             va, start, size);
> > > -               if (WARN_ON_ONCE(unlikely(ret)))
> > > +               if (unlikely(ret))
> > >                         /* It is a BUG(), but trigger recovery instead. */
> > >                         goto recovery;
> > >  
> > > <snip>
> > > It will WARN_ONCE_ONCE() only if classify_va_fit_type() is "(type == NOTHING_FIT)".
> > > 
> > This is good but i think it should be improved further. We need to
> > understand from the warning when no memory and when there is no a
> > vmap space, so:
> > 
> > - if NOTHING_FIT, we should WARN() for sure;
> > - Second place in the pcpu_get_vm_area(), we do not use NE_FIT. Only in
> >   the begging after boot, but potentially we can trigger -ENOMEM and we
> >   should warn in this case. Otherwise you just hide it;
> > - And last one if after repeating we still do not manage to allocate.
> > 
> 
> We should understand a reason of failing. I think error handling should
> be improved. Something like:

This looks good to me, while the parameter 'error' looks a little ugly.
How about this?

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ef8599d394fd..32805c82373b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1454,7 +1454,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
 			 */
 			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
 			if (!lva)
-				return -1;
+				return -ENOMEM;
 		}
 
 		/*
@@ -1468,7 +1468,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
 		 */
 		va->va_start = nva_start_addr + size;
 	} else {
-		return -1;
+		return -EINVAL;
 	}
 
 	if (type != FL_FIT_TYPE) {
@@ -1509,7 +1509,7 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
 
 	va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
 	if (unlikely(!va))
-		return vend;
+		return -ENOENT;
 
 	if (va->va_start > vstart)
 		nva_start_addr = ALIGN(va->va_start, align);
@@ -1518,12 +1518,12 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
 
 	/* Check the "vend" restriction. */
 	if (nva_start_addr + size > vend)
-		return vend;
+		return -ERANGE;
 
 	/* Update the free vmap_area. */
 	ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
-	if (WARN_ON_ONCE(ret))
-		return vend;
+	if (ret)
+		return ret;
 
 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
 	find_vmap_lowest_match_check(root, head, size, align);
@@ -1616,13 +1616,13 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 		size, align, vstart, vend);
 	spin_unlock(&free_vmap_area_lock);
 
-	trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend);
+	trace_alloc_vmap_area(addr, size, align, vstart, vend, IS_ERR(addr));
 
 	/*
-	 * If an allocation fails, the "vend" address is
+	 * If an allocation fails, the error value is
 	 * returned. Therefore trigger the overflow path.
 	 */
-	if (unlikely(addr == vend))
+	if (unlikely(IS_ERR(addr)))
 		goto overflow;
 
 	va->va_start = addr;
@@ -1662,8 +1662,9 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	}
 
 	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
-		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
-			size);
+		pr_warn("vmap allocation for size %lu failed: "
+			"use vmalloc=<size> to increase size, errno: (%d)\n",
+			size, addr);
 
 	kmem_cache_free(vmap_area_cachep, va);
 	return ERR_PTR(-EBUSY);
@@ -4143,8 +4144,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 		ret = adjust_va_to_fit_type(&free_vmap_area_root,
 					    &free_vmap_area_list,
 					    va, start, size);
-		if (WARN_ON_ONCE(unlikely(ret)))
-			/* It is a BUG(), but trigger recovery instead. */
+		if ((unlikely(ret)))
+			WARN_ONCE(1, "%s error: errno (%d)\n", __func__, ret);
 			goto recovery;
 
 		/* Allocated area. */

> 
> <snip>
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index ef8599d394fd..03a36921a3fc 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1454,7 +1454,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
>  			 */
>  			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
>  			if (!lva)
> -				return -1;
> +				return -ENOMEM;
>  		}
>  
>  		/*
> @@ -1468,7 +1468,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
>  		 */
>  		va->va_start = nva_start_addr + size;
>  	} else {
> -		return -1;
> +		return -EINVAL;
>  	}
>  
>  	if (type != FL_FIT_TYPE) {
> @@ -1488,7 +1488,8 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
>  static __always_inline unsigned long
>  __alloc_vmap_area(struct rb_root *root, struct list_head *head,
>  	unsigned long size, unsigned long align,
> -	unsigned long vstart, unsigned long vend)
> +	unsigned long vstart, unsigned long vend,
> +	int *error)
>  {
>  	bool adjust_search_size = true;
>  	unsigned long nva_start_addr;
> @@ -1508,8 +1509,10 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
>  		adjust_search_size = false;
>  
>  	va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
> -	if (unlikely(!va))
> +	if (unlikely(!va)) {
> +		*error = -ENOENT;
>  		return vend;
> +	}
>  
>  	if (va->va_start > vstart)
>  		nva_start_addr = ALIGN(va->va_start, align);
> @@ -1517,13 +1520,17 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
>  		nva_start_addr = ALIGN(vstart, align);
>  
>  	/* Check the "vend" restriction. */
> -	if (nva_start_addr + size > vend)
> +	if (nva_start_addr + size > vend) {
> +		*error = -ERANGE;
>  		return vend;
> +	}
>  
>  	/* Update the free vmap_area. */
>  	ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
> -	if (WARN_ON_ONCE(ret))
> +	if (ret) {
> +		*error = ret;
>  		return vend;
> +	}
>  
>  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
>  	find_vmap_lowest_match_check(root, head, size, align);
> @@ -1589,7 +1596,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>  	unsigned long freed;
>  	unsigned long addr;
>  	int purged = 0;
> -	int ret;
> +	int ret, error;
>  
>  	if (unlikely(!size || offset_in_page(size) || !is_power_of_2(align)))
>  		return ERR_PTR(-EINVAL);
> @@ -1613,7 +1620,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>  retry:
>  	preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
>  	addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
> -		size, align, vstart, vend);
> +		size, align, vstart, vend, &error);
>  	spin_unlock(&free_vmap_area_lock);
>  
>  	trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend);
> @@ -1662,8 +1669,9 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>  	}
>  
>  	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
> -		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
> -			size);
> +		pr_warn("vmap allocation for size %lu failed: "
> +			"use vmalloc=<size> to increase size, errno: (%d)\n",
> +			size, error);
>  
>  	kmem_cache_free(vmap_area_cachep, va);
>  	return ERR_PTR(-EBUSY);
> @@ -4143,9 +4151,10 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
>  		ret = adjust_va_to_fit_type(&free_vmap_area_root,
>  					    &free_vmap_area_list,
>  					    va, start, size);
> -		if (WARN_ON_ONCE(unlikely(ret)))
> -			/* It is a BUG(), but trigger recovery instead. */
> +		if (unlikely(ret)) {
> +			WARN_ONCE(1, "%s error: errno (%d)\n", __func__, ret);
>  			goto recovery;
> +		}
>  
>  		/* Allocated area. */
>  		va = vas[area];
> <snip>
> 
> Any thoughts?
Uladzislau Rezki Sept. 27, 2023, 3:25 p.m. UTC | #12
On Wed, Sep 27, 2023 at 09:33:04PM +0800, bhe@redhat.com wrote:
> On 09/27/23 at 01:49pm, Uladzislau Rezki wrote:
> > > > Yes, but GFP_NOWAIT-alloc-error can easily occur for low-memory device.
> > > >
> > > Agree. You are really in a low memory condition. We end up here only if
> > > pre-loading also has not succeeded, i.e. GFP_KERNEL also fails.
> > > 
> > > But i agree with you, we should "improve the warning" because we drain
> > > and repeat.
> > > 
> > > > How about changing fix as below?:
> > > > 
> > > > <snip>
> > > > --- a/mm/vmalloc.c
> > > > +++ b/mm/vmalloc.c
> > > > @@ -1468,6 +1468,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
> > > >                  */
> > > >                 va->va_start = nva_start_addr + size;
> > > >         } else {
> > > > +               WARN_ON_ONCE(1);
> > > >                 return -1;
> > > >         }
> > > >  
> > > > @@ -1522,7 +1523,7 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
> > > >  
> > > >         /* Update the free vmap_area. */
> > > >         ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
> > > > -       if (WARN_ON_ONCE(ret))
> > > > +       if (ret)
> > > >                 return vend;
> > > >  
> > > >  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
> > > > @@ -4143,7 +4144,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
> > > >                 ret = adjust_va_to_fit_type(&free_vmap_area_root,
> > > >                                             &free_vmap_area_list,
> > > >                                             va, start, size);
> > > > -               if (WARN_ON_ONCE(unlikely(ret)))
> > > > +               if (unlikely(ret))
> > > >                         /* It is a BUG(), but trigger recovery instead. */
> > > >                         goto recovery;
> > > >  
> > > > <snip>
> > > > It will WARN_ONCE_ONCE() only if classify_va_fit_type() is "(type == NOTHING_FIT)".
> > > > 
> > > This is good but i think it should be improved further. We need to
> > > understand from the warning when no memory and when there is no a
> > > vmap space, so:
> > > 
> > > - if NOTHING_FIT, we should WARN() for sure;
> > > - Second place in the pcpu_get_vm_area(), we do not use NE_FIT. Only in
> > >   the begging after boot, but potentially we can trigger -ENOMEM and we
> > >   should warn in this case. Otherwise you just hide it;
> > > - And last one if after repeating we still do not manage to allocate.
> > > 
> > 
> > We should understand a reason of failing. I think error handling should
> > be improved. Something like:
> 
> This looks good to me, while the parameter 'error' looks a little ugly.
> How about this?
> 
Agree. Embedding an error to "addr" is much more better way of handling
it, so we do not need an extra storage for errors. This is good :)

Jaeseon Sim - do you agree on it? If so please re-spin the patch if you
do not want please let us know.

> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index ef8599d394fd..32805c82373b 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1454,7 +1454,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
>  			 */
>  			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
>  			if (!lva)
> -				return -1;
> +				return -ENOMEM;
>  		}
>  
>  		/*
> @@ -1468,7 +1468,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
>  		 */
>  		va->va_start = nva_start_addr + size;
>  	} else {
> -		return -1;
> +		return -EINVAL;
>  	}
>  
>  	if (type != FL_FIT_TYPE) {
> @@ -1509,7 +1509,7 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
>  
>  	va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
>  	if (unlikely(!va))
> -		return vend;
> +		return -ENOENT;
>  
>  	if (va->va_start > vstart)
>  		nva_start_addr = ALIGN(va->va_start, align);
> @@ -1518,12 +1518,12 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
>  
>  	/* Check the "vend" restriction. */
>  	if (nva_start_addr + size > vend)
> -		return vend;
> +		return -ERANGE;
>  
>  	/* Update the free vmap_area. */
>  	ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
> -	if (WARN_ON_ONCE(ret))
> -		return vend;
> +	if (ret)
> +		return ret;
>  
>  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
>  	find_vmap_lowest_match_check(root, head, size, align);
> @@ -1616,13 +1616,13 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>  		size, align, vstart, vend);
>  	spin_unlock(&free_vmap_area_lock);
>  
> -	trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend);
> +	trace_alloc_vmap_area(addr, size, align, vstart, vend, IS_ERR(addr));
>  
>  	/*
> -	 * If an allocation fails, the "vend" address is
> +	 * If an allocation fails, the error value is
>  	 * returned. Therefore trigger the overflow path.
>  	 */
> -	if (unlikely(addr == vend))
> +	if (unlikely(IS_ERR(addr)))
>  		goto overflow;
>  
>  	va->va_start = addr;
> @@ -1662,8 +1662,9 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>  	}
>  
>  	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
> -		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
> -			size);
> +		pr_warn("vmap allocation for size %lu failed: "
> +			"use vmalloc=<size> to increase size, errno: (%d)\n",
> +			size, addr);
>  
>  	kmem_cache_free(vmap_area_cachep, va);
>  	return ERR_PTR(-EBUSY);
> @@ -4143,8 +4144,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
>  		ret = adjust_va_to_fit_type(&free_vmap_area_root,
>  					    &free_vmap_area_list,
>  					    va, start, size);
> -		if (WARN_ON_ONCE(unlikely(ret)))
> -			/* It is a BUG(), but trigger recovery instead. */
> +		if ((unlikely(ret)))
> +			WARN_ONCE(1, "%s error: errno (%d)\n", __func__, ret);
>  			goto recovery;
>  
>  		/* Allocated area. */
> 
> > 
> > <snip>
> > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > index ef8599d394fd..03a36921a3fc 100644
> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -1454,7 +1454,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
> >  			 */
> >  			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
> >  			if (!lva)
> > -				return -1;
> > +				return -ENOMEM;
> >  		}
> >  
> >  		/*
> > @@ -1468,7 +1468,7 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
> >  		 */
> >  		va->va_start = nva_start_addr + size;
> >  	} else {
> > -		return -1;
> > +		return -EINVAL;
> >  	}
> >  
> >  	if (type != FL_FIT_TYPE) {
> > @@ -1488,7 +1488,8 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
> >  static __always_inline unsigned long
> >  __alloc_vmap_area(struct rb_root *root, struct list_head *head,
> >  	unsigned long size, unsigned long align,
> > -	unsigned long vstart, unsigned long vend)
> > +	unsigned long vstart, unsigned long vend,
> > +	int *error)
> >  {
> >  	bool adjust_search_size = true;
> >  	unsigned long nva_start_addr;
> > @@ -1508,8 +1509,10 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
> >  		adjust_search_size = false;
> >  
> >  	va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
> > -	if (unlikely(!va))
> > +	if (unlikely(!va)) {
> > +		*error = -ENOENT;
> >  		return vend;
> > +	}
> >  
> >  	if (va->va_start > vstart)
> >  		nva_start_addr = ALIGN(va->va_start, align);
> > @@ -1517,13 +1520,17 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
> >  		nva_start_addr = ALIGN(vstart, align);
> >  
> >  	/* Check the "vend" restriction. */
> > -	if (nva_start_addr + size > vend)
> > +	if (nva_start_addr + size > vend) {
> > +		*error = -ERANGE;
> >  		return vend;
> > +	}
> >  
> >  	/* Update the free vmap_area. */
> >  	ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
> > -	if (WARN_ON_ONCE(ret))
> > +	if (ret) {
> > +		*error = ret;
> >  		return vend;
> > +	}
> >  
> >  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
> >  	find_vmap_lowest_match_check(root, head, size, align);
> > @@ -1589,7 +1596,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
> >  	unsigned long freed;
> >  	unsigned long addr;
> >  	int purged = 0;
> > -	int ret;
> > +	int ret, error;
> >  
> >  	if (unlikely(!size || offset_in_page(size) || !is_power_of_2(align)))
> >  		return ERR_PTR(-EINVAL);
> > @@ -1613,7 +1620,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
> >  retry:
> >  	preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
> >  	addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
> > -		size, align, vstart, vend);
> > +		size, align, vstart, vend, &error);
> >  	spin_unlock(&free_vmap_area_lock);
> >  
> >  	trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend);
> > @@ -1662,8 +1669,9 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
> >  	}
> >  
> >  	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
> > -		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
> > -			size);
> > +		pr_warn("vmap allocation for size %lu failed: "
> > +			"use vmalloc=<size> to increase size, errno: (%d)\n",
> > +			size, error);
> >  
> >  	kmem_cache_free(vmap_area_cachep, va);
> >  	return ERR_PTR(-EBUSY);
> > @@ -4143,9 +4151,10 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
> >  		ret = adjust_va_to_fit_type(&free_vmap_area_root,
> >  					    &free_vmap_area_list,
> >  					    va, start, size);
> > -		if (WARN_ON_ONCE(unlikely(ret)))
> > -			/* It is a BUG(), but trigger recovery instead. */
> > +		if (unlikely(ret)) {
> > +			WARN_ONCE(1, "%s error: errno (%d)\n", __func__, ret);
> >  			goto recovery;
> > +		}
> >  
> >  		/* Allocated area. */
> >  		va = vas[area];
> > <snip>
> > 
> > Any thoughts?
> 

--
Uladzislau Rezki
diff mbox series

Patch

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ef8599d394fd..4a82b6525d48 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1522,7 +1522,7 @@  __alloc_vmap_area(struct rb_root *root, struct list_head *head,
 
         /* Update the free vmap_area. */
         ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
-        if (WARN_ON_ONCE(ret))
+        if (ret)
                 return vend;
 
 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
@@ -4143,7 +4143,7 @@  struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
                 ret = adjust_va_to_fit_type(&free_vmap_area_root,
                                             &free_vmap_area_list,
                                             va, start, size);
-                if (WARN_ON_ONCE(unlikely(ret)))
+                if (unlikely(ret))
                         /* It is a BUG(), but trigger recovery instead. */
                         goto recovery;