diff mbox series

mm/slab_common: fix slab_caches list corruption after kmem_cache_destroy()

Message ID 20230908230649.802560-1-aquini@redhat.com (mailing list archive)
State New
Headers show
Series mm/slab_common: fix slab_caches list corruption after kmem_cache_destroy() | expand

Commit Message

Rafael Aquini Sept. 8, 2023, 11:06 p.m. UTC
After the commit in Fixes:, if a module that created a slab cache does not
release all of its allocated objects before destroying the cache (at rmmod
time), we might end up releasing the kmem_cache object without removing it
from the slab_caches list thus corrupting the list as kmem_cache_destroy()
ignores the return value from shutdown_cache(), which in turn never removes
the kmem_cache object from slabs_list in case __kmem_cache_shutdown() fails
to release all of the cache's slabs.

This is easily observable on a kernel built with CONFIG_DEBUG_LIST=y
as after that ill release the system will immediately trip on list_add,
or list_del, assertions similar to the one shown below as soon as another
kmem_cache gets created, or destroyed:

  [ 1041.213632] list_del corruption. next->prev should be ffff89f596fb5768, but was 52f1e5016aeee75d. (next=ffff89f595a1b268)
  [ 1041.219165] ------------[ cut here ]------------
  [ 1041.221517] kernel BUG at lib/list_debug.c:62!
  [ 1041.223452] invalid opcode: 0000 [#1] PREEMPT SMP PTI
  [ 1041.225408] CPU: 2 PID: 1852 Comm: rmmod Kdump: loaded Tainted: G    B   W  OE      6.5.0 #15
  [ 1041.228244] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
  [ 1041.231212] RIP: 0010:__list_del_entry_valid+0xae/0xb0

Another quick way to trigger this issue, in a kernel with CONFIG_SLUB=y,
is to set slub_debug to poison the released objects and then just run
cat /proc/slabinfo after removing the module that leaks slab objects,
in which case the kernel will panic:

  [   50.954843] general protection fault, probably for non-canonical address 0xa56b6b6b6b6b6b8b: 0000 [#1] PREEMPT SMP PTI
  [   50.961545] CPU: 2 PID: 1495 Comm: cat Kdump: loaded Tainted: G    B   W  OE      6.5.0 #15
  [   50.966808] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
  [   50.972663] RIP: 0010:get_slabinfo+0x42/0xf0

This patch fixes this issue by properly checking shutdown_cache()'s
return value before taking the kmem_cache_release() branch.

Fixes: 0495e337b703 ("mm/slab_common: Deleting kobject in kmem_cache_destroy() without holding slab_mutex/cpu_hotplug_lock")
Signed-off-by: Rafael Aquini <aquini@redhat.com>
Cc: stable@vger.kernel.org
---
 mm/slab_common.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

Comments

Waiman Long Sept. 8, 2023, 11:21 p.m. UTC | #1
On 9/8/23 19:06, Rafael Aquini wrote:
> After the commit in Fixes:, if a module that created a slab cache does not
> release all of its allocated objects before destroying the cache (at rmmod
> time), we might end up releasing the kmem_cache object without removing it
> from the slab_caches list thus corrupting the list as kmem_cache_destroy()
> ignores the return value from shutdown_cache(), which in turn never removes
> the kmem_cache object from slabs_list in case __kmem_cache_shutdown() fails
> to release all of the cache's slabs.
>
> This is easily observable on a kernel built with CONFIG_DEBUG_LIST=y
> as after that ill release the system will immediately trip on list_add,
> or list_del, assertions similar to the one shown below as soon as another
> kmem_cache gets created, or destroyed:
>
>    [ 1041.213632] list_del corruption. next->prev should be ffff89f596fb5768, but was 52f1e5016aeee75d. (next=ffff89f595a1b268)
>    [ 1041.219165] ------------[ cut here ]------------
>    [ 1041.221517] kernel BUG at lib/list_debug.c:62!
>    [ 1041.223452] invalid opcode: 0000 [#1] PREEMPT SMP PTI
>    [ 1041.225408] CPU: 2 PID: 1852 Comm: rmmod Kdump: loaded Tainted: G    B   W  OE      6.5.0 #15
>    [ 1041.228244] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
>    [ 1041.231212] RIP: 0010:__list_del_entry_valid+0xae/0xb0
>
> Another quick way to trigger this issue, in a kernel with CONFIG_SLUB=y,
> is to set slub_debug to poison the released objects and then just run
> cat /proc/slabinfo after removing the module that leaks slab objects,
> in which case the kernel will panic:
>
>    [   50.954843] general protection fault, probably for non-canonical address 0xa56b6b6b6b6b6b8b: 0000 [#1] PREEMPT SMP PTI
>    [   50.961545] CPU: 2 PID: 1495 Comm: cat Kdump: loaded Tainted: G    B   W  OE      6.5.0 #15
>    [   50.966808] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
>    [   50.972663] RIP: 0010:get_slabinfo+0x42/0xf0
>
> This patch fixes this issue by properly checking shutdown_cache()'s
> return value before taking the kmem_cache_release() branch.
>
> Fixes: 0495e337b703 ("mm/slab_common: Deleting kobject in kmem_cache_destroy() without holding slab_mutex/cpu_hotplug_lock")
> Signed-off-by: Rafael Aquini <aquini@redhat.com>
> Cc: stable@vger.kernel.org
> ---
>   mm/slab_common.c | 13 ++++++++-----
>   1 file changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index cd71f9581e67..31e581dc6e85 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -479,7 +479,7 @@ void slab_kmem_cache_release(struct kmem_cache *s)
>   
>   void kmem_cache_destroy(struct kmem_cache *s)
>   {
> -	int refcnt;
> +	int err;
>   	bool rcu_set;
>   
>   	if (unlikely(!s) || !kasan_check_byte(s))
> @@ -490,17 +490,20 @@ void kmem_cache_destroy(struct kmem_cache *s)
>   
>   	rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
>   
> -	refcnt = --s->refcount;
> -	if (refcnt)
> +	s->refcount--;
> +	if (s->refcount) {
> +		err = -EBUSY;
>   		goto out_unlock;
> +	}
>   
> -	WARN(shutdown_cache(s),
> +	err = shutdown_cache(s);
> +	WARN(err,
>   	     "%s %s: Slab cache still has objects when called from %pS",
>   	     __func__, s->name, (void *)_RET_IP_);
>   out_unlock:
>   	mutex_unlock(&slab_mutex);
>   	cpus_read_unlock();
> -	if (!refcnt && !rcu_set)
> +	if (!err && !rcu_set)
>   		kmem_cache_release(s);
>   }
>   EXPORT_SYMBOL(kmem_cache_destroy);

Thanks for fixing this corner case.

Reviewed-by: Waiman Long <longman@redhat.com>
Matthew Wilcox Sept. 9, 2023, 1:16 a.m. UTC | #2
On Fri, Sep 08, 2023 at 07:06:49PM -0400, Rafael Aquini wrote:
> This patch fixes this issue by properly checking shutdown_cache()'s
> return value before taking the kmem_cache_release() branch.

Is this the right way to fix this problem?  If the module destroys the
slab cache, it's not going to be possible to free any of the objects
still allocated from the cache.  I feel that we should treat this as
implicitly freeing all the objects that were allocated from the cache
rather than saying the cache is still busy.
Rafael Aquini Sept. 9, 2023, 1:51 a.m. UTC | #3
On Sat, Sep 09, 2023 at 02:16:00AM +0100, Matthew Wilcox wrote:
> On Fri, Sep 08, 2023 at 07:06:49PM -0400, Rafael Aquini wrote:
> > This patch fixes this issue by properly checking shutdown_cache()'s
> > return value before taking the kmem_cache_release() branch.
> 
> Is this the right way to fix this problem?  If the module destroys the
> slab cache, it's not going to be possible to free any of the objects
> still allocated from the cache.  I feel that we should treat this as
> implicitly freeing all the objects that were allocated from the cache
> rather than saying the cache is still busy.
>

Leaving the cache with the unfreeable slabs "alone" is how it was historically 
done, and we have to fix this corner case opened by 0495e337b703 this way to 
address the corruption on stable releases without changing their established 
and expected behavior.

I think your proposal for a different behavior upon cache destruction is
something we should discuss for future releases, but it is orthogonal to
this required follow-up fix.
Vlastimil Babka Sept. 11, 2023, 3:06 p.m. UTC | #4
On 9/9/23 01:06, Rafael Aquini wrote:
> After the commit in Fixes:, if a module that created a slab cache does not
> release all of its allocated objects before destroying the cache (at rmmod
> time), we might end up releasing the kmem_cache object without removing it
> from the slab_caches list thus corrupting the list as kmem_cache_destroy()
> ignores the return value from shutdown_cache(), which in turn never removes
> the kmem_cache object from slabs_list in case __kmem_cache_shutdown() fails
> to release all of the cache's slabs.
> 
> This is easily observable on a kernel built with CONFIG_DEBUG_LIST=y
> as after that ill release the system will immediately trip on list_add,
> or list_del, assertions similar to the one shown below as soon as another
> kmem_cache gets created, or destroyed:
> 
>   [ 1041.213632] list_del corruption. next->prev should be ffff89f596fb5768, but was 52f1e5016aeee75d. (next=ffff89f595a1b268)
>   [ 1041.219165] ------------[ cut here ]------------
>   [ 1041.221517] kernel BUG at lib/list_debug.c:62!
>   [ 1041.223452] invalid opcode: 0000 [#1] PREEMPT SMP PTI
>   [ 1041.225408] CPU: 2 PID: 1852 Comm: rmmod Kdump: loaded Tainted: G    B   W  OE      6.5.0 #15
>   [ 1041.228244] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
>   [ 1041.231212] RIP: 0010:__list_del_entry_valid+0xae/0xb0
> 
> Another quick way to trigger this issue, in a kernel with CONFIG_SLUB=y,
> is to set slub_debug to poison the released objects and then just run
> cat /proc/slabinfo after removing the module that leaks slab objects,
> in which case the kernel will panic:
> 
>   [   50.954843] general protection fault, probably for non-canonical address 0xa56b6b6b6b6b6b8b: 0000 [#1] PREEMPT SMP PTI
>   [   50.961545] CPU: 2 PID: 1495 Comm: cat Kdump: loaded Tainted: G    B   W  OE      6.5.0 #15
>   [   50.966808] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
>   [   50.972663] RIP: 0010:get_slabinfo+0x42/0xf0
> 
> This patch fixes this issue by properly checking shutdown_cache()'s
> return value before taking the kmem_cache_release() branch.
> 
> Fixes: 0495e337b703 ("mm/slab_common: Deleting kobject in kmem_cache_destroy() without holding slab_mutex/cpu_hotplug_lock")
> Signed-off-by: Rafael Aquini <aquini@redhat.com>
> Cc: stable@vger.kernel.org

Thanks, added to slab.git. Tweaked the code a bit:

https://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git/commit/?h=slab/for-6.6/hotfixes&id=46a9ea6681907a3be6b6b0d43776dccc62cad6cf

> ---
>  mm/slab_common.c | 13 ++++++++-----
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index cd71f9581e67..31e581dc6e85 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -479,7 +479,7 @@ void slab_kmem_cache_release(struct kmem_cache *s)
>  
>  void kmem_cache_destroy(struct kmem_cache *s)
>  {
> -	int refcnt;
> +	int err;
>  	bool rcu_set;
>  
>  	if (unlikely(!s) || !kasan_check_byte(s))
> @@ -490,17 +490,20 @@ void kmem_cache_destroy(struct kmem_cache *s)
>  
>  	rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
>  
> -	refcnt = --s->refcount;
> -	if (refcnt)
> +	s->refcount--;
> +	if (s->refcount) {
> +		err = -EBUSY;
>  		goto out_unlock;
> +	}
>  
> -	WARN(shutdown_cache(s),
> +	err = shutdown_cache(s);
> +	WARN(err,
>  	     "%s %s: Slab cache still has objects when called from %pS",
>  	     __func__, s->name, (void *)_RET_IP_);
>  out_unlock:
>  	mutex_unlock(&slab_mutex);
>  	cpus_read_unlock();
> -	if (!refcnt && !rcu_set)
> +	if (!err && !rcu_set)
>  		kmem_cache_release(s);
>  }
>  EXPORT_SYMBOL(kmem_cache_destroy);
Rafael Aquini Sept. 11, 2023, 4:47 p.m. UTC | #5
On Mon, Sep 11, 2023 at 05:06:15PM +0200, Vlastimil Babka wrote:
> On 9/9/23 01:06, Rafael Aquini wrote:
> > After the commit in Fixes:, if a module that created a slab cache does not
> > release all of its allocated objects before destroying the cache (at rmmod
> > time), we might end up releasing the kmem_cache object without removing it
> > from the slab_caches list thus corrupting the list as kmem_cache_destroy()
> > ignores the return value from shutdown_cache(), which in turn never removes
> > the kmem_cache object from slabs_list in case __kmem_cache_shutdown() fails
> > to release all of the cache's slabs.
> > 
> > This is easily observable on a kernel built with CONFIG_DEBUG_LIST=y
> > as after that ill release the system will immediately trip on list_add,
> > or list_del, assertions similar to the one shown below as soon as another
> > kmem_cache gets created, or destroyed:
> > 
> >   [ 1041.213632] list_del corruption. next->prev should be ffff89f596fb5768, but was 52f1e5016aeee75d. (next=ffff89f595a1b268)
> >   [ 1041.219165] ------------[ cut here ]------------
> >   [ 1041.221517] kernel BUG at lib/list_debug.c:62!
> >   [ 1041.223452] invalid opcode: 0000 [#1] PREEMPT SMP PTI
> >   [ 1041.225408] CPU: 2 PID: 1852 Comm: rmmod Kdump: loaded Tainted: G    B   W  OE      6.5.0 #15
> >   [ 1041.228244] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
> >   [ 1041.231212] RIP: 0010:__list_del_entry_valid+0xae/0xb0
> > 
> > Another quick way to trigger this issue, in a kernel with CONFIG_SLUB=y,
> > is to set slub_debug to poison the released objects and then just run
> > cat /proc/slabinfo after removing the module that leaks slab objects,
> > in which case the kernel will panic:
> > 
> >   [   50.954843] general protection fault, probably for non-canonical address 0xa56b6b6b6b6b6b8b: 0000 [#1] PREEMPT SMP PTI
> >   [   50.961545] CPU: 2 PID: 1495 Comm: cat Kdump: loaded Tainted: G    B   W  OE      6.5.0 #15
> >   [   50.966808] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc37 05/24/2023
> >   [   50.972663] RIP: 0010:get_slabinfo+0x42/0xf0
> > 
> > This patch fixes this issue by properly checking shutdown_cache()'s
> > return value before taking the kmem_cache_release() branch.
> > 
> > Fixes: 0495e337b703 ("mm/slab_common: Deleting kobject in kmem_cache_destroy() without holding slab_mutex/cpu_hotplug_lock")
> > Signed-off-by: Rafael Aquini <aquini@redhat.com>
> > Cc: stable@vger.kernel.org
> 
> Thanks, added to slab.git. Tweaked the code a bit:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git/commit/?h=slab/for-6.6/hotfixes&id=46a9ea6681907a3be6b6b0d43776dccc62cad6cf
>

Thank you, Vlastimil.

 
> > ---
> >  mm/slab_common.c | 13 ++++++++-----
> >  1 file changed, 8 insertions(+), 5 deletions(-)
> > 
> > diff --git a/mm/slab_common.c b/mm/slab_common.c
> > index cd71f9581e67..31e581dc6e85 100644
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -479,7 +479,7 @@ void slab_kmem_cache_release(struct kmem_cache *s)
> >  
> >  void kmem_cache_destroy(struct kmem_cache *s)
> >  {
> > -	int refcnt;
> > +	int err;
> >  	bool rcu_set;
> >  
> >  	if (unlikely(!s) || !kasan_check_byte(s))
> > @@ -490,17 +490,20 @@ void kmem_cache_destroy(struct kmem_cache *s)
> >  
> >  	rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
> >  
> > -	refcnt = --s->refcount;
> > -	if (refcnt)
> > +	s->refcount--;
> > +	if (s->refcount) {
> > +		err = -EBUSY;
> >  		goto out_unlock;
> > +	}
> >  
> > -	WARN(shutdown_cache(s),
> > +	err = shutdown_cache(s);
> > +	WARN(err,
> >  	     "%s %s: Slab cache still has objects when called from %pS",
> >  	     __func__, s->name, (void *)_RET_IP_);
> >  out_unlock:
> >  	mutex_unlock(&slab_mutex);
> >  	cpus_read_unlock();
> > -	if (!refcnt && !rcu_set)
> > +	if (!err && !rcu_set)
> >  		kmem_cache_release(s);
> >  }
> >  EXPORT_SYMBOL(kmem_cache_destroy);
> 
>
diff mbox series

Patch

diff --git a/mm/slab_common.c b/mm/slab_common.c
index cd71f9581e67..31e581dc6e85 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -479,7 +479,7 @@  void slab_kmem_cache_release(struct kmem_cache *s)
 
 void kmem_cache_destroy(struct kmem_cache *s)
 {
-	int refcnt;
+	int err;
 	bool rcu_set;
 
 	if (unlikely(!s) || !kasan_check_byte(s))
@@ -490,17 +490,20 @@  void kmem_cache_destroy(struct kmem_cache *s)
 
 	rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
 
-	refcnt = --s->refcount;
-	if (refcnt)
+	s->refcount--;
+	if (s->refcount) {
+		err = -EBUSY;
 		goto out_unlock;
+	}
 
-	WARN(shutdown_cache(s),
+	err = shutdown_cache(s);
+	WARN(err,
 	     "%s %s: Slab cache still has objects when called from %pS",
 	     __func__, s->name, (void *)_RET_IP_);
 out_unlock:
 	mutex_unlock(&slab_mutex);
 	cpus_read_unlock();
-	if (!refcnt && !rcu_set)
+	if (!err && !rcu_set)
 		kmem_cache_release(s);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);