mm/slub: fix a deadlock in show_slab_objects()
diff mbox series

Message ID 1570131869-2545-1-git-send-email-cai@lca.pw
State New
Headers show
Series
  • mm/slub: fix a deadlock in show_slab_objects()
Related show

Commit Message

Qian Cai Oct. 3, 2019, 7:44 p.m. UTC
Long time ago, there fixed a similar deadlock in show_slab_objects()
[1]. However, it is apparently due to the commits like 01fb58bcba63
("slab: remove synchronous synchronize_sched() from memcg cache
deactivation path") and 03afc0e25f7f ("slab: get_online_mems for
kmem_cache_{create,destroy,shrink}"), this kind of deadlock is back by
just reading files in /sys/kernel/slab will generate a lockdep splat
below.

Since the "mem_hotplug_lock" here is only to obtain a stable online node
mask while racing with NUMA node hotplug, it is probably fine to do
without it.

WARNING: possible circular locking dependency detected
------------------------------------------------------
cat/5224 is trying to acquire lock:
ffff900012ac3120 (mem_hotplug_lock.rw_sem){++++}, at:
show_slab_objects+0x94/0x3a8

but task is already holding lock:
b8ff009693eee398 (kn->count#45){++++}, at: kernfs_seq_start+0x44/0xf0

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (kn->count#45){++++}:
       lock_acquire+0x31c/0x360
       __kernfs_remove+0x290/0x490
       kernfs_remove+0x30/0x44
       sysfs_remove_dir+0x70/0x88
       kobject_del+0x50/0xb0
       sysfs_slab_unlink+0x2c/0x38
       shutdown_cache+0xa0/0xf0
       kmemcg_cache_shutdown_fn+0x1c/0x34
       kmemcg_workfn+0x44/0x64
       process_one_work+0x4f4/0x950
       worker_thread+0x390/0x4bc
       kthread+0x1cc/0x1e8
       ret_from_fork+0x10/0x18

-> #1 (slab_mutex){+.+.}:
       lock_acquire+0x31c/0x360
       __mutex_lock_common+0x16c/0xf78
       mutex_lock_nested+0x40/0x50
       memcg_create_kmem_cache+0x38/0x16c
       memcg_kmem_cache_create_func+0x3c/0x70
       process_one_work+0x4f4/0x950
       worker_thread+0x390/0x4bc
       kthread+0x1cc/0x1e8
       ret_from_fork+0x10/0x18

-> #0 (mem_hotplug_lock.rw_sem){++++}:
       validate_chain+0xd10/0x2bcc
       __lock_acquire+0x7f4/0xb8c
       lock_acquire+0x31c/0x360
       get_online_mems+0x54/0x150
       show_slab_objects+0x94/0x3a8
       total_objects_show+0x28/0x34
       slab_attr_show+0x38/0x54
       sysfs_kf_seq_show+0x198/0x2d4
       kernfs_seq_show+0xa4/0xcc
       seq_read+0x30c/0x8a8
       kernfs_fop_read+0xa8/0x314
       __vfs_read+0x88/0x20c
       vfs_read+0xd8/0x10c
       ksys_read+0xb0/0x120
       __arm64_sys_read+0x54/0x88
       el0_svc_handler+0x170/0x240
       el0_svc+0x8/0xc

other info that might help us debug this:

Chain exists of:
  mem_hotplug_lock.rw_sem --> slab_mutex --> kn->count#45

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(kn->count#45);
                               lock(slab_mutex);
                               lock(kn->count#45);
  lock(mem_hotplug_lock.rw_sem);

 *** DEADLOCK ***

3 locks held by cat/5224:
 #0: 9eff00095b14b2a0 (&p->lock){+.+.}, at: seq_read+0x4c/0x8a8
 #1: 0eff008997041480 (&of->mutex){+.+.}, at: kernfs_seq_start+0x34/0xf0
 #2: b8ff009693eee398 (kn->count#45){++++}, at:
kernfs_seq_start+0x44/0xf0

stack backtrace:
Call trace:
 dump_backtrace+0x0/0x248
 show_stack+0x20/0x2c
 dump_stack+0xd0/0x140
 print_circular_bug+0x368/0x380
 check_noncircular+0x248/0x250
 validate_chain+0xd10/0x2bcc
 __lock_acquire+0x7f4/0xb8c
 lock_acquire+0x31c/0x360
 get_online_mems+0x54/0x150
 show_slab_objects+0x94/0x3a8
 total_objects_show+0x28/0x34
 slab_attr_show+0x38/0x54
 sysfs_kf_seq_show+0x198/0x2d4
 kernfs_seq_show+0xa4/0xcc
 seq_read+0x30c/0x8a8
 kernfs_fop_read+0xa8/0x314
 __vfs_read+0x88/0x20c
 vfs_read+0xd8/0x10c
 ksys_read+0xb0/0x120
 __arm64_sys_read+0x54/0x88
 el0_svc_handler+0x170/0x240
 el0_svc+0x8/0xc

[1] http://lkml.iu.edu/hypermail/linux/kernel/1101.0/02850.html

Signed-off-by: Qian Cai <cai@lca.pw>
---
 mm/slub.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

Comments

David Rientjes Oct. 3, 2019, 7:56 p.m. UTC | #1
On Thu, 3 Oct 2019, Qian Cai wrote:

> diff --git a/mm/slub.c b/mm/slub.c
> index 42c1b3af3c98..922cdcf5758a 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -4838,7 +4838,15 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
>  		}
>  	}
>  
> -	get_online_mems();
> +/*
> + * It is not possible to take "mem_hotplug_lock" here, as it has already held
> + * "kernfs_mutex" which could race with the lock order:
> + *
> + * mem_hotplug_lock->slab_mutex->kernfs_mutex
> + *
> + * In the worest case, it might be mis-calculated while doing NUMA node
> + * hotplug, but it shall be corrected by later reads of the same files.
> + */
>  #ifdef CONFIG_SLUB_DEBUG
>  	if (flags & SO_ALL) {
>  		struct kmem_cache_node *n;

No objection to removing the {get,put}_online_mems() but the comment 
doesn't match the kernel style.  I actually don't think we need the 
comment at all, actually.

> @@ -4879,7 +4887,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
>  			x += sprintf(buf + x, " N%d=%lu",
>  					node, nodes[node]);
>  #endif
> -	put_online_mems();
>  	kfree(nodes);
>  	return x + sprintf(buf + x, "\n");
>  }
Qian Cai Oct. 3, 2019, 8:07 p.m. UTC | #2
On Thu, 2019-10-03 at 12:56 -0700, David Rientjes wrote:
> On Thu, 3 Oct 2019, Qian Cai wrote:
> 
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 42c1b3af3c98..922cdcf5758a 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -4838,7 +4838,15 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
> >  		}
> >  	}
> >  
> > -	get_online_mems();
> > +/*
> > + * It is not possible to take "mem_hotplug_lock" here, as it has already held
> > + * "kernfs_mutex" which could race with the lock order:
> > + *
> > + * mem_hotplug_lock->slab_mutex->kernfs_mutex
> > + *
> > + * In the worest case, it might be mis-calculated while doing NUMA node
> > + * hotplug, but it shall be corrected by later reads of the same files.
> > + */
> >  #ifdef CONFIG_SLUB_DEBUG
> >  	if (flags & SO_ALL) {
> >  		struct kmem_cache_node *n;
> 
> No objection to removing the {get,put}_online_mems() but the comment 
> doesn't match the kernel style.  I actually don't think we need the 
> comment at all, actually.

I am a bit worry about later someone comes to add the lock back as he/she
figures out that it could get more accurate statistics that way, but I agree it
is probably an overkill.

> 
> > @@ -4879,7 +4887,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
> >  			x += sprintf(buf + x, " N%d=%lu",
> >  					node, nodes[node]);
> >  #endif
> > -	put_online_mems();
> >  	kfree(nodes);
> >  	return x + sprintf(buf + x, "\n");
> >  }
David Rientjes Oct. 3, 2019, 8:27 p.m. UTC | #3
On Thu, 3 Oct 2019, Qian Cai wrote:

> > > diff --git a/mm/slub.c b/mm/slub.c
> > > index 42c1b3af3c98..922cdcf5758a 100644
> > > --- a/mm/slub.c
> > > +++ b/mm/slub.c
> > > @@ -4838,7 +4838,15 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
> > >  		}
> > >  	}
> > >  
> > > -	get_online_mems();
> > > +/*
> > > + * It is not possible to take "mem_hotplug_lock" here, as it has already held
> > > + * "kernfs_mutex" which could race with the lock order:
> > > + *
> > > + * mem_hotplug_lock->slab_mutex->kernfs_mutex
> > > + *
> > > + * In the worest case, it might be mis-calculated while doing NUMA node
> > > + * hotplug, but it shall be corrected by later reads of the same files.
> > > + */
> > >  #ifdef CONFIG_SLUB_DEBUG
> > >  	if (flags & SO_ALL) {
> > >  		struct kmem_cache_node *n;
> > 
> > No objection to removing the {get,put}_online_mems() but the comment 
> > doesn't match the kernel style.  I actually don't think we need the 
> > comment at all, actually.
> 
> I am a bit worry about later someone comes to add the lock back as he/she
> figures out that it could get more accurate statistics that way, but I agree it
> is probably an overkill.
> 

Maybe just a small comment that follows the kernel coding style?
Michal Hocko Oct. 4, 2019, 8:13 a.m. UTC | #4
On Thu 03-10-19 15:44:29, Qian Cai wrote:
> Long time ago, there fixed a similar deadlock in show_slab_objects()
> [1]. However, it is apparently due to the commits like 01fb58bcba63
> ("slab: remove synchronous synchronize_sched() from memcg cache
> deactivation path") and 03afc0e25f7f ("slab: get_online_mems for
> kmem_cache_{create,destroy,shrink}"), this kind of deadlock is back by
> just reading files in /sys/kernel/slab will generate a lockdep splat
> below.
> 
> Since the "mem_hotplug_lock" here is only to obtain a stable online node
> mask while racing with NUMA node hotplug, it is probably fine to do
> without it.

"It is probably fine" is not a proper justification. Please have a look
at my older email where I've exaplained why I believe it is safe.

> WARNING: possible circular locking dependency detected
> ------------------------------------------------------

I pressume the deadlock is real. If that is the case then Cc: stable and
Fixes tag would be really appreciated.

> Signed-off-by: Qian Cai <cai@lca.pw>

Anyway, I do agree that this is the right thing to do. With the improved
changelog, fixed up the comment alignment feel free to add
Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  mm/slub.c | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 42c1b3af3c98..922cdcf5758a 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -4838,7 +4838,15 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
>  		}
>  	}
>  
> -	get_online_mems();
> +/*
> + * It is not possible to take "mem_hotplug_lock" here, as it has already held
> + * "kernfs_mutex" which could race with the lock order:
> + *
> + * mem_hotplug_lock->slab_mutex->kernfs_mutex
> + *
> + * In the worest case, it might be mis-calculated while doing NUMA node
> + * hotplug, but it shall be corrected by later reads of the same files.
> + */
>  #ifdef CONFIG_SLUB_DEBUG
>  	if (flags & SO_ALL) {
>  		struct kmem_cache_node *n;
> @@ -4879,7 +4887,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
>  			x += sprintf(buf + x, " N%d=%lu",
>  					node, nodes[node]);
>  #endif
> -	put_online_mems();
>  	kfree(nodes);
>  	return x + sprintf(buf + x, "\n");
>  }
> -- 
> 1.8.3.1
>

Patch
diff mbox series

diff --git a/mm/slub.c b/mm/slub.c
index 42c1b3af3c98..922cdcf5758a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4838,7 +4838,15 @@  static ssize_t show_slab_objects(struct kmem_cache *s,
 		}
 	}
 
-	get_online_mems();
+/*
+ * It is not possible to take "mem_hotplug_lock" here, as it has already held
+ * "kernfs_mutex" which could race with the lock order:
+ *
+ * mem_hotplug_lock->slab_mutex->kernfs_mutex
+ *
+ * In the worest case, it might be mis-calculated while doing NUMA node
+ * hotplug, but it shall be corrected by later reads of the same files.
+ */
 #ifdef CONFIG_SLUB_DEBUG
 	if (flags & SO_ALL) {
 		struct kmem_cache_node *n;
@@ -4879,7 +4887,6 @@  static ssize_t show_slab_objects(struct kmem_cache *s,
 			x += sprintf(buf + x, " N%d=%lu",
 					node, nodes[node]);
 #endif
-	put_online_mems();
 	kfree(nodes);
 	return x + sprintf(buf + x, "\n");
 }