diff mbox series

kasan,kmsan: remove __GFP_KSWAPD_RECLAIM usage from kasan/kmsan

Message ID 656cb4f5-998b-c8d7-3c61-c2d37aa90f9a@I-love.SAKURA.ne.jp (mailing list archive)
State New
Headers show
Series kasan,kmsan: remove __GFP_KSWAPD_RECLAIM usage from kasan/kmsan | expand

Commit Message

Tetsuo Handa May 27, 2023, 3:25 p.m. UTC
syzbot is reporting lockdep warning in __stack_depot_save(), for
the caller of __stack_depot_save() (i.e. __kasan_record_aux_stack() in
this report) is responsible for masking __GFP_KSWAPD_RECLAIM flag in
order not to wake kswapd which in turn wakes kcompactd.

Since kasan/kmsan functions might be called with arbitrary locks held,
mask __GFP_KSWAPD_RECLAIM flag from all GFP_NOWAIT/GFP_ATOMIC allocations
in kasan/kmsan.

Note that kmsan_save_stack_with_flags() is changed to mask both
__GFP_DIRECT_RECLAIM flag and __GFP_KSWAPD_RECLAIM flag, for
wakeup_kswapd() from wake_all_kswapds() from __alloc_pages_slowpath()
calls wakeup_kcompactd() if __GFP_KSWAPD_RECLAIM flag is set and
__GFP_DIRECT_RECLAIM flag is not set.

Reported-by: syzbot <syzbot+ece2915262061d6e0ac1@syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=ece2915262061d6e0ac1
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 mm/kasan/generic.c         | 4 ++--
 mm/kasan/tags.c            | 2 +-
 mm/kmsan/core.c            | 6 +++---
 mm/kmsan/instrumentation.c | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

Comments

Huang, Ying May 29, 2023, 1:07 a.m. UTC | #1
Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> writes:

> syzbot is reporting lockdep warning in __stack_depot_save(), for
> the caller of __stack_depot_save() (i.e. __kasan_record_aux_stack() in
> this report) is responsible for masking __GFP_KSWAPD_RECLAIM flag in
> order not to wake kswapd which in turn wakes kcompactd.
>
> Since kasan/kmsan functions might be called with arbitrary locks held,
> mask __GFP_KSWAPD_RECLAIM flag from all GFP_NOWAIT/GFP_ATOMIC allocations
> in kasan/kmsan.
>
> Note that kmsan_save_stack_with_flags() is changed to mask both
> __GFP_DIRECT_RECLAIM flag and __GFP_KSWAPD_RECLAIM flag, for
> wakeup_kswapd() from wake_all_kswapds() from __alloc_pages_slowpath()
> calls wakeup_kcompactd() if __GFP_KSWAPD_RECLAIM flag is set and
> __GFP_DIRECT_RECLAIM flag is not set.
>
> Reported-by: syzbot <syzbot+ece2915262061d6e0ac1@syzkaller.appspotmail.com>
> Closes: https://syzkaller.appspot.com/bug?extid=ece2915262061d6e0ac1
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>

This looks good to me.  Thanks!

Reviewed-by: "Huang, Ying" <ying.huang@intel.com>

> ---
>  mm/kasan/generic.c         | 4 ++--
>  mm/kasan/tags.c            | 2 +-
>  mm/kmsan/core.c            | 6 +++---
>  mm/kmsan/instrumentation.c | 2 +-
>  4 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
> index e5eef670735e..2c94f4943240 100644
> --- a/mm/kasan/generic.c
> +++ b/mm/kasan/generic.c
> @@ -488,7 +488,7 @@ static void __kasan_record_aux_stack(void *addr, bool can_alloc)
>  		return;
>  
>  	alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0];
> -	alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT, can_alloc);
> +	alloc_meta->aux_stack[0] = kasan_save_stack(0, can_alloc);
>  }
>  
>  void kasan_record_aux_stack(void *addr)
> @@ -518,7 +518,7 @@ void kasan_save_free_info(struct kmem_cache *cache, void *object)
>  	if (!free_meta)
>  		return;
>  
> -	kasan_set_track(&free_meta->free_track, GFP_NOWAIT);
> +	kasan_set_track(&free_meta->free_track, 0);
>  	/* The object was freed and has free track set. */
>  	*(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREETRACK;
>  }
> diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
> index 67a222586846..7dcfe341d48e 100644
> --- a/mm/kasan/tags.c
> +++ b/mm/kasan/tags.c
> @@ -140,5 +140,5 @@ void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
>  
>  void kasan_save_free_info(struct kmem_cache *cache, void *object)
>  {
> -	save_stack_info(cache, object, GFP_NOWAIT, true);
> +	save_stack_info(cache, object, 0, true);
>  }
> diff --git a/mm/kmsan/core.c b/mm/kmsan/core.c
> index 7d1e4aa30bae..3adb4c1d3b19 100644
> --- a/mm/kmsan/core.c
> +++ b/mm/kmsan/core.c
> @@ -74,7 +74,7 @@ depot_stack_handle_t kmsan_save_stack_with_flags(gfp_t flags,
>  	nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0);
>  
>  	/* Don't sleep. */
> -	flags &= ~__GFP_DIRECT_RECLAIM;
> +	flags &= ~(__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM);
>  
>  	handle = __stack_depot_save(entries, nr_entries, flags, true);
>  	return stack_depot_set_extra_bits(handle, extra);
> @@ -245,7 +245,7 @@ depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id)
>  	extra_bits = kmsan_extra_bits(depth, uaf);
>  
>  	entries[0] = KMSAN_CHAIN_MAGIC_ORIGIN;
> -	entries[1] = kmsan_save_stack_with_flags(GFP_ATOMIC, 0);
> +	entries[1] = kmsan_save_stack_with_flags(__GFP_HIGH, 0);
>  	entries[2] = id;
>  	/*
>  	 * @entries is a local var in non-instrumented code, so KMSAN does not
> @@ -253,7 +253,7 @@ depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id)
>  	 * positives when __stack_depot_save() passes it to instrumented code.
>  	 */
>  	kmsan_internal_unpoison_memory(entries, sizeof(entries), false);
> -	handle = __stack_depot_save(entries, ARRAY_SIZE(entries), GFP_ATOMIC,
> +	handle = __stack_depot_save(entries, ARRAY_SIZE(entries), __GFP_HIGH,
>  				    true);
>  	return stack_depot_set_extra_bits(handle, extra_bits);
>  }
> diff --git a/mm/kmsan/instrumentation.c b/mm/kmsan/instrumentation.c
> index cf12e9616b24..cc3907a9c33a 100644
> --- a/mm/kmsan/instrumentation.c
> +++ b/mm/kmsan/instrumentation.c
> @@ -282,7 +282,7 @@ void __msan_poison_alloca(void *address, uintptr_t size, char *descr)
>  
>  	/* stack_depot_save() may allocate memory. */
>  	kmsan_enter_runtime();
> -	handle = stack_depot_save(entries, ARRAY_SIZE(entries), GFP_ATOMIC);
> +	handle = stack_depot_save(entries, ARRAY_SIZE(entries), __GFP_HIGH);
>  	kmsan_leave_runtime();
>  
>  	kmsan_internal_set_shadow_origin(address, size, -1, handle,
Alexander Potapenko May 31, 2023, 1:31 p.m. UTC | #2
On Mon, May 29, 2023 at 3:08 AM Huang, Ying <ying.huang@intel.com> wrote:
>
> Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> writes:
>
> > syzbot is reporting lockdep warning in __stack_depot_save(), for
> > the caller of __stack_depot_save() (i.e. __kasan_record_aux_stack() in
> > this report) is responsible for masking __GFP_KSWAPD_RECLAIM flag in
> > order not to wake kswapd which in turn wakes kcompactd.
> >
> > Since kasan/kmsan functions might be called with arbitrary locks held,
> > mask __GFP_KSWAPD_RECLAIM flag from all GFP_NOWAIT/GFP_ATOMIC allocations
> > in kasan/kmsan.
> >
> > Note that kmsan_save_stack_with_flags() is changed to mask both
> > __GFP_DIRECT_RECLAIM flag and __GFP_KSWAPD_RECLAIM flag, for
> > wakeup_kswapd() from wake_all_kswapds() from __alloc_pages_slowpath()
> > calls wakeup_kcompactd() if __GFP_KSWAPD_RECLAIM flag is set and
> > __GFP_DIRECT_RECLAIM flag is not set.
> >
> > Reported-by: syzbot <syzbot+ece2915262061d6e0ac1@syzkaller.appspotmail.com>
> > Closes: https://syzkaller.appspot.com/bug?extid=ece2915262061d6e0ac1
> > Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
>
> This looks good to me.  Thanks!
>
> Reviewed-by: "Huang, Ying" <ying.huang@intel.com>

Sorry for the late reply, but maybe it would be better to mask this
flag in __stack_depot_save() (lib/stackdepot.c) instead?
We are already masking out a number of flags there, and the problem
seems quite generic.
Andrew Morton June 9, 2023, 10:31 p.m. UTC | #3
On Wed, 31 May 2023 15:31:53 +0200 Alexander Potapenko <glider@google.com> wrote:

> On Mon, May 29, 2023 at 3:08 AM Huang, Ying <ying.huang@intel.com> wrote:
> >
> > ? Handa <penguin-kernel@I-love.SAKURA.ne.jp> writes:
> >
> > > syzbot is reporting lockdep warning in __stack_depot_save(), for
> > > the caller of __stack_depot_save() (i.e. __kasan_record_aux_stack() in
> > > this report) is responsible for masking __GFP_KSWAPD_RECLAIM flag in
> > > order not to wake kswapd which in turn wakes kcompactd.
> > >
> > > Since kasan/kmsan functions might be called with arbitrary locks held,
> > > mask __GFP_KSWAPD_RECLAIM flag from all GFP_NOWAIT/GFP_ATOMIC allocations
> > > in kasan/kmsan.
> > >
> > > Note that kmsan_save_stack_with_flags() is changed to mask both
> > > __GFP_DIRECT_RECLAIM flag and __GFP_KSWAPD_RECLAIM flag, for
> > > wakeup_kswapd() from wake_all_kswapds() from __alloc_pages_slowpath()
> > > calls wakeup_kcompactd() if __GFP_KSWAPD_RECLAIM flag is set and
> > > __GFP_DIRECT_RECLAIM flag is not set.
> > >
> > > Reported-by: syzbot <syzbot+ece2915262061d6e0ac1@syzkaller.appspotmail.com>
> > > Closes: https://syzkaller.appspot.com/bug?extid=ece2915262061d6e0ac1
> > > Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> >
> > This looks good to me.  Thanks!
> >
> > Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
> 
> Sorry for the late reply, but maybe it would be better to mask this
> flag in __stack_depot_save() (lib/stackdepot.c) instead?
> We are already masking out a number of flags there, and the problem
> seems quite generic.


Tetsuo?
Huang, Ying June 12, 2023, 1:30 a.m. UTC | #4
Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> writes:

> syzbot is reporting lockdep warning in __stack_depot_save(), for
> __kasan_record_aux_stack() is passing GFP_NOWAIT which will result in
> calling wakeup_kcompactd() from wakeup_kswapd() from wake_all_kswapds()
>  from __alloc_pages_slowpath().
>
> Strictly speaking, __kasan_record_aux_stack() is responsible for removing
> __GFP_KSWAPD_RECLAIM flag in order not to wake kswapd which in turn wakes
> kcompactd. But since KASAN and KMSAN functions might be called with
> arbitrary locks held, we should consider removing __GFP_KSWAPD_RECLAIM
> flag from KASAN and KMSAN. And this patch goes one step further; let's
> remove __GFP_KSWAPD_RECLAIM flag in the __stack_depot_save() side, based
> on the following reasons.
>
> Reason 1:
>
>   Currently, __stack_depot_save() has "alloc_flags &= ~GFP_ZONEMASK;" line
>   which is pointless because "alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);"
>   line will also zero out zone modifiers. But why is __stack_depot_save()
>   trying to mask gfp flags supplied by the caller?
>
>   I guess that __stack_depot_save() tried to be as robust as possible. But
>   __stack_depot_save() is a debugging function where all callers have to
>   be able to survive allocation failures. Scattering low-level gfp flags
>   like 0 or __GFP_HIGH should be avoided in order to replace GFP_NOWAIT or
>   GFP_ATOMIC.
>
> Reason 2:
>
>   __stack_depot_save() from stack_depot_save() is also called by
>   ref_tracker_alloc() from __netns_tracker_alloc() from
>   netns_tracker_alloc() from get_net_track(), and some of get_net_track()
>   users are passing GFP_ATOMIC because waking kswapd/kcompactd is safe.
>   But even if we mask __GFP_KSWAPD_RECLAIM flag at __stack_depot_save(),
>   it is very likely that allocations with __GFP_KSWAPD_RECLAIM flag happen
>   somewhere else by the moment __stack_depot_save() is called for the next
>   time.
>
>   Therefore, not waking kswapd/kcompactd when doing allocation for
>   __stack_depot_save() will be acceptable from the memory reclaim latency
>   perspective.

TBH, I don't like to remove __GFP_KSWAPD_RECLAIM flag unnecessarily.
But this is only my personal opinion.

> While we are at it, let's make __stack_depot_save() accept __GFP_NORETRY
> and __GFP_RETRY_MAYFAIL flags, based on the following reason.
>
> Reason 3:
>
>   Since DEPOT_POOL_ORDER is defined as 2, we must mask __GFP_NOFAIL flag
>   in order not to complain rmqueue(). But masking __GFP_NORETRY flag and
>   __GFP_RETRY_MAYFAIL flag might be overkill.
>
>   The OOM killer might be needlessly invoked due to order-2 allocation if
>   GFP_KERNEL is supplied by the caller, despite the caller might have
>   passed GFP_KERNEL for doing order-0 allocation.
>
>   Allocation for order-2 might stall if GFP_NOFS or GFP_NOIO is supplied
>   by the caller, despite the caller might have passed GFP_NOFS or GFP_NOIO
>   for doing order-0 allocation.
>
>   Generally speaking, I feel that doing order-2 allocation from
>   __stack_depot_save() with gfp flags supplied by the caller is an
>   unexpected behavior for the callers. We might want to use only order-0
>   allocation, and/or stop using gfp flags supplied by the caller...

Per my understanding, this isn't locking issue reported by syzbot?  If
so, I suggest to put this in a separate patch.

> Reported-by: syzbot <syzbot+ece2915262061d6e0ac1@syzkaller.appspotmail.com>
> Closes: https://syzkaller.appspot.com/bug?extid=ece2915262061d6e0ac1
> Suggested-by: Alexander Potapenko <glider@google.com>
> Cc: Huang, Ying <ying.huang@intel.com>
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---
> Changes in v3:
>   Huang, Ying thinks that masking __GFP_KSWAPD_RECLAIM flag in the callers
>   side is preferable
>   ( https://lkml.kernel.org/r/87fs7nyhs3.fsf@yhuang6-desk2.ccr.corp.intel.com ).
>   But Alexander Potapenko thinks that masking __GFP_KSWAPD_RECLAIM flag
>   in the callee side would be the better
>   ( https://lkml.kernel.org/r/CAG_fn=UTTbkGeOX0teGcNOeobtgV=mfGOefZpV-NTN4Ouus7xA@mail.gmail.com ).
>   I took Alexander's suggestion, and added reasoning for masking
>   __GFP_KSWAPD_RECLAIM flag in the callee side.
>
> Changes in v2:
>   Mask __GFP_KSWAPD_RECLAIM flag in the callers, suggested by Huang, Ying
>   ( https://lkml.kernel.org/r/87edn92jvz.fsf@yhuang6-desk2.ccr.corp.intel.com ).
>
>  lib/stackdepot.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> index 2f5aa851834e..33ebefaa7074 100644
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -405,7 +405,10 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>  		 * contexts and I/O.
>  		 */
>  		alloc_flags &= ~GFP_ZONEMASK;
> -		alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
> +		if (!(alloc_flags & __GFP_DIRECT_RECLAIM))
> +			alloc_flags &= __GFP_HIGH;

Why not just

                        alloc_flags &= ~__GFP_KSWAPD_RECLAIM;

?

> +		else
> +			alloc_flags &= ~__GFP_NOFAIL;
>  		alloc_flags |= __GFP_NOWARN;
>  		page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER);
>  		if (page)

Best Regards,
Huang, Ying
Alexander Potapenko June 21, 2023, 12:56 p.m. UTC | #5
On Sat, Jun 10, 2023 at 1:40 PM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
>
> syzbot is reporting lockdep warning in __stack_depot_save(), for
> __kasan_record_aux_stack() is passing GFP_NOWAIT which will result in
> calling wakeup_kcompactd() from wakeup_kswapd() from wake_all_kswapds()
>  from __alloc_pages_slowpath().
>
> Strictly speaking, __kasan_record_aux_stack() is responsible for removing
> __GFP_KSWAPD_RECLAIM flag in order not to wake kswapd which in turn wakes
> kcompactd. But since KASAN and KMSAN functions might be called with
> arbitrary locks held, we should consider removing __GFP_KSWAPD_RECLAIM
> flag from KASAN and KMSAN. And this patch goes one step further; let's
> remove __GFP_KSWAPD_RECLAIM flag in the __stack_depot_save() side, based
> on the following reasons.
>
> Reason 1:
>
>   Currently, __stack_depot_save() has "alloc_flags &= ~GFP_ZONEMASK;" line
>   which is pointless because "alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);"
>   line will also zero out zone modifiers.

Good catch, we indeed do not need the GFP_ZONEMASK line now.
But looks like you'll need it at least in the __GFP_NOFAIL branch?

> But why is __stack_depot_save()
>   trying to mask gfp flags supplied by the caller?
>
>   I guess that __stack_depot_save() tried to be as robust as possible. But
>   __stack_depot_save() is a debugging function where all callers have to
>   be able to survive allocation failures.

This, but also the allocation should not deadlock.
E.g. KMSAN can call __stack_depot_save() from almost any function in
the kernel, so we'd better avoid heavyweight memory reclaiming,
because that in turn may call __stack_depot_save() again.

>
> Reason 2:
>
>   __stack_depot_save() from stack_depot_save() is also called by
>   ref_tracker_alloc() from __netns_tracker_alloc() from
>   netns_tracker_alloc() from get_net_track(), and some of get_net_track()
>   users are passing GFP_ATOMIC because waking kswapd/kcompactd is safe.
>   But even if we mask __GFP_KSWAPD_RECLAIM flag at __stack_depot_save(),
>   it is very likely that allocations with __GFP_KSWAPD_RECLAIM flag happen
>   somewhere else by the moment __stack_depot_save() is called for the next
>   time.
>
>   Therefore, not waking kswapd/kcompactd when doing allocation for
>   __stack_depot_save() will be acceptable from the memory reclaim latency
>   perspective.

Ack.

> While we are at it, let's make __stack_depot_save() accept __GFP_NORETRY
> and __GFP_RETRY_MAYFAIL flags, based on the following reason.

Looks like you're accepting a whole bunch of flags in addition to
__GFP_NORETRY and __GFP_RETRY_MAYFAIL - maybe list the two explicitly?

> Reason 3:
>
>   Since DEPOT_POOL_ORDER is defined as 2, we must mask __GFP_NOFAIL flag
>   in order not to complain rmqueue(). But masking __GFP_NORETRY flag and
>   __GFP_RETRY_MAYFAIL flag might be overkill.
>
>   The OOM killer might be needlessly invoked due to order-2 allocation if
>   GFP_KERNEL is supplied by the caller, despite the caller might have
>   passed GFP_KERNEL for doing order-0 allocation.

As you noted above, stackdepot is a debug feature anyway, so invoking
OOM killer because there is no memory for an order-2 allocation might
be an acceptable behavior?

>   Allocation for order-2 might stall if GFP_NOFS or GFP_NOIO is supplied
>   by the caller, despite the caller might have passed GFP_NOFS or GFP_NOIO
>   for doing order-0 allocation.

What if the caller passed GFP_NOFS to avoid calling back into FS, and
discarding that flag would result in a recursion?
Same for GFP_NOIO.

>   Generally speaking, I feel that doing order-2 allocation from
>   __stack_depot_save() with gfp flags supplied by the caller is an
>   unexpected behavior for the callers. We might want to use only order-0
>   allocation, and/or stop using gfp flags supplied by the caller...

Right now stackdepot allows the following list of flags: __GFP_HIGH,
__GFP_KSWAPD_RECLAIM, __GFP_DIRECT_RECLAIM, __GFP_IO, __GFP_FS.
We could restrict it further to __GFP_HIGH | __GFP_DIRECT_RECLAIM to
be on the safe side - plus allow __GFP_NORETRY and
__GFP_RETRY_MAYFAIL.



> Reported-by: syzbot <syzbot+ece2915262061d6e0ac1@syzkaller.appspotmail.com>
> Closes: https://syzkaller.appspot.com/bug?extid=ece2915262061d6e0ac1
> Suggested-by: Alexander Potapenko <glider@google.com>
> Cc: Huang, Ying <ying.huang@intel.com>
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---
> Changes in v3:
>   Huang, Ying thinks that masking __GFP_KSWAPD_RECLAIM flag in the callers
>   side is preferable
>   ( https://lkml.kernel.org/r/87fs7nyhs3.fsf@yhuang6-desk2.ccr.corp.intel.com ).
>   But Alexander Potapenko thinks that masking __GFP_KSWAPD_RECLAIM flag
>   in the callee side would be the better
>   ( https://lkml.kernel.org/r/CAG_fn=UTTbkGeOX0teGcNOeobtgV=mfGOefZpV-NTN4Ouus7xA@mail.gmail.com ).
>   I took Alexander's suggestion, and added reasoning for masking
>   __GFP_KSWAPD_RECLAIM flag in the callee side.
>
> Changes in v2:
>   Mask __GFP_KSWAPD_RECLAIM flag in the callers, suggested by Huang, Ying
>   ( https://lkml.kernel.org/r/87edn92jvz.fsf@yhuang6-desk2.ccr.corp.intel.com ).
>
>  lib/stackdepot.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> index 2f5aa851834e..33ebefaa7074 100644
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -405,7 +405,10 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>                  * contexts and I/O.
>                  */
>                 alloc_flags &= ~GFP_ZONEMASK;
> -               alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
> +               if (!(alloc_flags & __GFP_DIRECT_RECLAIM))
> +                       alloc_flags &= __GFP_HIGH;
> +               else
> +                       alloc_flags &= ~__GFP_NOFAIL;
>                 alloc_flags |= __GFP_NOWARN;
>                 page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER);
>                 if (page)
> --
> 2.18.4
>
>
Tetsuo Handa June 21, 2023, 2:07 p.m. UTC | #6
On 2023/06/21 21:56, Alexander Potapenko wrote:
>> But why is __stack_depot_save()
>>   trying to mask gfp flags supplied by the caller?
>>
>>   I guess that __stack_depot_save() tried to be as robust as possible. But
>>   __stack_depot_save() is a debugging function where all callers have to
>>   be able to survive allocation failures.
> 
> This, but also the allocation should not deadlock.
> E.g. KMSAN can call __stack_depot_save() from almost any function in
> the kernel, so we'd better avoid heavyweight memory reclaiming,
> because that in turn may call __stack_depot_save() again.

Then, isn't "[PATCH] kasan,kmsan: remove __GFP_KSWAPD_RECLAIM usage from
kasan/kmsan" the better fix?



>>   Allocation for order-2 might stall if GFP_NOFS or GFP_NOIO is supplied
>>   by the caller, despite the caller might have passed GFP_NOFS or GFP_NOIO
>>   for doing order-0 allocation.
> 
> What if the caller passed GFP_NOFS to avoid calling back into FS, and
> discarding that flag would result in a recursion?
> Same for GFP_NOIO.

Excuse me, but "alloc_flags &= ~__GFP_NOFAIL;" will not discard flags in
GFP_NOFS / GFP_NOIO ?



>>   Generally speaking, I feel that doing order-2 allocation from
>>   __stack_depot_save() with gfp flags supplied by the caller is an
>>   unexpected behavior for the callers. We might want to use only order-0
>>   allocation, and/or stop using gfp flags supplied by the caller...
> 
> Right now stackdepot allows the following list of flags: __GFP_HIGH,
> __GFP_KSWAPD_RECLAIM, __GFP_DIRECT_RECLAIM, __GFP_IO, __GFP_FS.
> We could restrict it further to __GFP_HIGH | __GFP_DIRECT_RECLAIM to
> be on the safe side - plus allow __GFP_NORETRY and
> __GFP_RETRY_MAYFAIL.

I feel that making such change is killing more than needed; there is
no need to discard __GFP_KSWAPD_RECLAIM when GFP_KERNEL is given.

"[PATCH] kasan,kmsan: remove __GFP_KSWAPD_RECLAIM usage from kasan/kmsan"
looks the better.
Alexander Potapenko June 21, 2023, 2:42 p.m. UTC | #7
On Wed, Jun 21, 2023 at 4:07 PM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
>
> On 2023/06/21 21:56, Alexander Potapenko wrote:
> >> But why is __stack_depot_save()
> >>   trying to mask gfp flags supplied by the caller?
> >>
> >>   I guess that __stack_depot_save() tried to be as robust as possible. But
> >>   __stack_depot_save() is a debugging function where all callers have to
> >>   be able to survive allocation failures.
> >
> > This, but also the allocation should not deadlock.
> > E.g. KMSAN can call __stack_depot_save() from almost any function in
> > the kernel, so we'd better avoid heavyweight memory reclaiming,
> > because that in turn may call __stack_depot_save() again.
>
> Then, isn't "[PATCH] kasan,kmsan: remove __GFP_KSWAPD_RECLAIM usage from
> kasan/kmsan" the better fix?

Perhaps you are right and I shouldn't have insisted on pushing this
flag down to stackdepot.
If other users (e.g. page_owner) can afford invoking kswapd, then we
are good to go, and new compiler-based tools can use the same flags
KASAN and KMSAN do.


>
> >>   Allocation for order-2 might stall if GFP_NOFS or GFP_NOIO is supplied
> >>   by the caller, despite the caller might have passed GFP_NOFS or GFP_NOIO
> >>   for doing order-0 allocation.
> >
> > What if the caller passed GFP_NOFS to avoid calling back into FS, and
> > discarding that flag would result in a recursion?
> > Same for GFP_NOIO.
>
> Excuse me, but "alloc_flags &= ~__GFP_NOFAIL;" will not discard flags in
> GFP_NOFS / GFP_NOIO ?

But not for the other if-clause?
Anyway, I actually confused GFP_NOIO (which is technically
__GFP_RECLAIM) and GFP_NOFS with __GFP_IO/__GFP_FS, thinking that
there's a separate pair of GFP flags opposite to __GFP_IO and
__GFP_FS.
Please disregard.

>
>
> >>   Generally speaking, I feel that doing order-2 allocation from
> >>   __stack_depot_save() with gfp flags supplied by the caller is an
> >>   unexpected behavior for the callers. We might want to use only order-0
> >>   allocation, and/or stop using gfp flags supplied by the caller...
> >
> > Right now stackdepot allows the following list of flags: __GFP_HIGH,
> > __GFP_KSWAPD_RECLAIM, __GFP_DIRECT_RECLAIM, __GFP_IO, __GFP_FS.
> > We could restrict it further to __GFP_HIGH | __GFP_DIRECT_RECLAIM to
> > be on the safe side - plus allow __GFP_NORETRY and
> > __GFP_RETRY_MAYFAIL.
>
> I feel that making such change is killing more than needed; there is
> no need to discard __GFP_KSWAPD_RECLAIM when GFP_KERNEL is given.
>
> "[PATCH] kasan,kmsan: remove __GFP_KSWAPD_RECLAIM usage from kasan/kmsan"
> looks the better.
>

I agree, let's go for it.
Sorry for the trouble.
Tetsuo Handa June 21, 2023, 2:54 p.m. UTC | #8
On 2023/06/21 23:42, Alexander Potapenko wrote:
>> "[PATCH] kasan,kmsan: remove __GFP_KSWAPD_RECLAIM usage from kasan/kmsan"
>> looks the better.
>>
> 
> I agree, let's go for it.
> Sorry for the trouble.
> 

No problem. :-)

Andrew, please take "[PATCH] kasan,kmsan: remove __GFP_KSWAPD_RECLAIM usage from kasan/kmsan"
at https://lkml.kernel.org/r/656cb4f5-998b-c8d7-3c61-c2d37aa90f9a@I-love.SAKURA.ne.jp
with "Reviewed-by: "Huang, Ying" <ying.huang@intel.com>" added.
Alexander Potapenko June 21, 2023, 3:37 p.m. UTC | #9
On Sat, Jun 10, 2023 at 12:31 AM Andrew Morton
<akpm@linux-foundation.org> wrote:
>
> On Wed, 31 May 2023 15:31:53 +0200 Alexander Potapenko <glider@google.com> wrote:
>
> > On Mon, May 29, 2023 at 3:08 AM Huang, Ying <ying.huang@intel.com> wrote:
> > >
> > > ? Handa <penguin-kernel@I-love.SAKURA.ne.jp> writes:
> > >
> > > > syzbot is reporting lockdep warning in __stack_depot_save(), for
> > > > the caller of __stack_depot_save() (i.e. __kasan_record_aux_stack() in
> > > > this report) is responsible for masking __GFP_KSWAPD_RECLAIM flag in
> > > > order not to wake kswapd which in turn wakes kcompactd.
> > > >
> > > > Since kasan/kmsan functions might be called with arbitrary locks held,
> > > > mask __GFP_KSWAPD_RECLAIM flag from all GFP_NOWAIT/GFP_ATOMIC allocations
> > > > in kasan/kmsan.
> > > >
> > > > Note that kmsan_save_stack_with_flags() is changed to mask both
> > > > __GFP_DIRECT_RECLAIM flag and __GFP_KSWAPD_RECLAIM flag, for
> > > > wakeup_kswapd() from wake_all_kswapds() from __alloc_pages_slowpath()
> > > > calls wakeup_kcompactd() if __GFP_KSWAPD_RECLAIM flag is set and
> > > > __GFP_DIRECT_RECLAIM flag is not set.
> > > >
> > > > Reported-by: syzbot <syzbot+ece2915262061d6e0ac1@syzkaller.appspotmail.com>
> > > > Closes: https://syzkaller.appspot.com/bug?extid=ece2915262061d6e0ac1
> > > > Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> > >
> > > This looks good to me.  Thanks!
> > >
> > > Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
> >
> > Sorry for the late reply, but maybe it would be better to mask this
> > flag in __stack_depot_save() (lib/stackdepot.c) instead?
> > We are already masking out a number of flags there, and the problem
> > seems quite generic.
>
>
> Tetsuo?

Reviewed-by: Alexander Potapenko <glider@google.com>

Andrew, please accept this patch. As noted in the other thread, no
changes to stackdepot are needed.
diff mbox series

Patch

diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index e5eef670735e..2c94f4943240 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -488,7 +488,7 @@  static void __kasan_record_aux_stack(void *addr, bool can_alloc)
 		return;
 
 	alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0];
-	alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT, can_alloc);
+	alloc_meta->aux_stack[0] = kasan_save_stack(0, can_alloc);
 }
 
 void kasan_record_aux_stack(void *addr)
@@ -518,7 +518,7 @@  void kasan_save_free_info(struct kmem_cache *cache, void *object)
 	if (!free_meta)
 		return;
 
-	kasan_set_track(&free_meta->free_track, GFP_NOWAIT);
+	kasan_set_track(&free_meta->free_track, 0);
 	/* The object was freed and has free track set. */
 	*(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREETRACK;
 }
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
index 67a222586846..7dcfe341d48e 100644
--- a/mm/kasan/tags.c
+++ b/mm/kasan/tags.c
@@ -140,5 +140,5 @@  void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
 
 void kasan_save_free_info(struct kmem_cache *cache, void *object)
 {
-	save_stack_info(cache, object, GFP_NOWAIT, true);
+	save_stack_info(cache, object, 0, true);
 }
diff --git a/mm/kmsan/core.c b/mm/kmsan/core.c
index 7d1e4aa30bae..3adb4c1d3b19 100644
--- a/mm/kmsan/core.c
+++ b/mm/kmsan/core.c
@@ -74,7 +74,7 @@  depot_stack_handle_t kmsan_save_stack_with_flags(gfp_t flags,
 	nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0);
 
 	/* Don't sleep. */
-	flags &= ~__GFP_DIRECT_RECLAIM;
+	flags &= ~(__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM);
 
 	handle = __stack_depot_save(entries, nr_entries, flags, true);
 	return stack_depot_set_extra_bits(handle, extra);
@@ -245,7 +245,7 @@  depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id)
 	extra_bits = kmsan_extra_bits(depth, uaf);
 
 	entries[0] = KMSAN_CHAIN_MAGIC_ORIGIN;
-	entries[1] = kmsan_save_stack_with_flags(GFP_ATOMIC, 0);
+	entries[1] = kmsan_save_stack_with_flags(__GFP_HIGH, 0);
 	entries[2] = id;
 	/*
 	 * @entries is a local var in non-instrumented code, so KMSAN does not
@@ -253,7 +253,7 @@  depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id)
 	 * positives when __stack_depot_save() passes it to instrumented code.
 	 */
 	kmsan_internal_unpoison_memory(entries, sizeof(entries), false);
-	handle = __stack_depot_save(entries, ARRAY_SIZE(entries), GFP_ATOMIC,
+	handle = __stack_depot_save(entries, ARRAY_SIZE(entries), __GFP_HIGH,
 				    true);
 	return stack_depot_set_extra_bits(handle, extra_bits);
 }
diff --git a/mm/kmsan/instrumentation.c b/mm/kmsan/instrumentation.c
index cf12e9616b24..cc3907a9c33a 100644
--- a/mm/kmsan/instrumentation.c
+++ b/mm/kmsan/instrumentation.c
@@ -282,7 +282,7 @@  void __msan_poison_alloca(void *address, uintptr_t size, char *descr)
 
 	/* stack_depot_save() may allocate memory. */
 	kmsan_enter_runtime();
-	handle = stack_depot_save(entries, ARRAY_SIZE(entries), GFP_ATOMIC);
+	handle = stack_depot_save(entries, ARRAY_SIZE(entries), __GFP_HIGH);
 	kmsan_leave_runtime();
 
 	kmsan_internal_set_shadow_origin(address, size, -1, handle,