Message ID | 20231109222251.a62811ebde9b.Ia70a49792c448867fd61b0234e1da507b0f75086@changeid (mailing list archive) |
---|---|
State | RFC |
Delegated to: | Johannes Berg |
Headers | show |
Series | debugfs/wifi: locking fixes | expand |
On (23/11/09 22:22), Johannes Berg wrote: > From: Johannes Berg <johannes.berg@intel.com> > > When you take a lock in a debugfs handler but also try > to remove the debugfs file under that lock, things can > deadlock since the removal has to wait for all users > to finish. > > Add lockdep annotations in debugfs_file_get()/_put() > to catch such issues. So this triggers when I reset zram device (zsmalloc compiled with CONFIG_ZSMALLOC_STAT). debugfs_create_file() and debugfs_remove_recursive() are called under zram->init_lock, and zsmalloc never calls into zram code. What I don't really get is where does the `debugfs::classes -> zram->init_lock` dependency come from? [ 47.283364] ====================================================== [ 47.284790] WARNING: possible circular locking dependency detected [ 47.286217] 6.7.0-rc3-next-20231201+ #239 Tainted: G N [ 47.287723] ------------------------------------------------------ [ 47.289145] zram-test.sh/727 is trying to acquire lock: [ 47.290350] ffff88814b824070 (debugfs:classes){++++}-{0:0}, at: remove_one+0x65/0x210 [ 47.292202] [ 47.292202] but task is already holding lock: [ 47.293554] ffff88812fe7ee48 (&sb->s_type->i_mutex_key#2){++++}-{3:3}, at: simple_recursive_removal+0x217/0x3a0 [ 47.295895] [ 47.295895] which lock already depends on the new lock. [ 47.295895] [ 47.297757] [ 47.297757] the existing dependency chain (in reverse order) is: [ 47.299498] [ 47.299498] -> #4 (&sb->s_type->i_mutex_key#2){++++}-{3:3}: [ 47.301129] down_write+0x40/0x80 [ 47.302028] start_creating+0xa5/0x1b0 [ 47.303024] debugfs_create_dir+0x16/0x240 [ 47.304104] zs_create_pool+0x5da/0x6f0 [ 47.305123] disksize_store+0xce/0x320 [ 47.306129] kernfs_fop_write_iter+0x1cb/0x270 [ 47.307279] vfs_write+0x42f/0x4c0 [ 47.308205] ksys_write+0x8f/0x110 [ 47.309129] do_syscall_64+0x40/0xe0 [ 47.310100] entry_SYSCALL_64_after_hwframe+0x62/0x6a [ 47.311403] [ 47.311403] -> #3 (&zram->init_lock){++++}-{3:3}: [ 47.312856] down_write+0x40/0x80 [ 47.313755] zram_reset_device+0x22/0x2b0 [ 47.314814] reset_store+0x15b/0x190 [ 47.315788] kernfs_fop_write_iter+0x1cb/0x270 [ 47.316946] vfs_write+0x42f/0x4c0 [ 47.317869] ksys_write+0x8f/0x110 [ 47.318787] do_syscall_64+0x40/0xe0 [ 47.319754] entry_SYSCALL_64_after_hwframe+0x62/0x6a [ 47.321051] [ 47.321051] -> #2 (&of->mutex){+.+.}-{3:3}: [ 47.322374] __mutex_lock+0x97/0x810 [ 47.323338] kernfs_seq_start+0x34/0x190 [ 47.324387] seq_read_iter+0x1e1/0x6c0 [ 47.325389] vfs_read+0x38f/0x420 [ 47.326295] ksys_read+0x8f/0x110 [ 47.327200] do_syscall_64+0x40/0xe0 [ 47.328164] entry_SYSCALL_64_after_hwframe+0x62/0x6a [ 47.329460] [ 47.329460] -> #1 (&p->lock){+.+.}-{3:3}: [ 47.330740] __mutex_lock+0x97/0x810 [ 47.331717] seq_read_iter+0x5c/0x6c0 [ 47.332696] seq_read+0xfe/0x140 [ 47.333577] full_proxy_read+0x90/0x110 [ 47.334598] vfs_read+0xfb/0x420 [ 47.335482] ksys_read+0x8f/0x110 [ 47.336382] do_syscall_64+0x40/0xe0 [ 47.337344] entry_SYSCALL_64_after_hwframe+0x62/0x6a [ 47.338636] [ 47.338636] -> #0 (debugfs:classes){++++}-{0:0}: [ 47.340056] __lock_acquire+0x20b1/0x3b50 [ 47.341117] lock_acquire+0xe3/0x210 [ 47.342072] remove_one+0x7d/0x210 [ 47.342991] simple_recursive_removal+0x325/0x3a0 [ 47.344210] debugfs_remove+0x40/0x60 [ 47.345181] zs_destroy_pool+0x4e/0x3d0 [ 47.346199] zram_reset_device+0x151/0x2b0 [ 47.347272] reset_store+0x15b/0x190 [ 47.348257] kernfs_fop_write_iter+0x1cb/0x270 [ 47.349426] vfs_write+0x42f/0x4c0 [ 47.350350] ksys_write+0x8f/0x110 [ 47.351273] do_syscall_64+0x40/0xe0 [ 47.352239] entry_SYSCALL_64_after_hwframe+0x62/0x6a [ 47.353536] [ 47.353536] other info that might help us debug this: [ 47.353536] [ 47.355381] Chain exists of: [ 47.355381] debugfs:classes --> &zram->init_lock --> &sb->s_type->i_mutex_key#2 [ 47.355381] [ 47.358105] Possible unsafe locking scenario: [ 47.358105] [ 47.359484] CPU0 CPU1 [ 47.360545] ---- ---- [ 47.361599] lock(&sb->s_type->i_mutex_key#2); [ 47.362665] lock(&zram->init_lock); [ 47.364146] lock(&sb->s_type->i_mutex_key#2); [ 47.365781] lock(debugfs:classes); [ 47.366626] [ 47.366626] *** DEADLOCK *** [ 47.366626] [ 47.368005] 5 locks held by zram-test.sh/727: [ 47.369028] #0: ffff88811420c420 (sb_writers#5){.+.+}-{0:0}, at: vfs_write+0xff/0x4c0 [ 47.370873] #1: ffff8881346e1090 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x126/0x270 [ 47.372932] #2: ffff88810b7bfb38 (kn->active#50){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x137/0x270 [ 47.375052] #3: ffff88810b3e60b0 (&zram->init_lock){++++}-{3:3}, at: zram_reset_device+0x22/0x2b0 [ 47.377131] #4: ffff88812fe7ee48 (&sb->s_type->i_mutex_key#2){++++}-{3:3}, at: simple_recursive_removal+0x217/0x3a0 [ 47.379599] [ 47.379599] stack backtrace: [ 47.380619] CPU: 39 PID: 727 Comm: zram-test.sh Tainted: G N 6.7.0-rc3-next-20231201+ #239 [ 47.382836] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 47.384976] Call Trace: [ 47.385565] <TASK> [ 47.386074] dump_stack_lvl+0x6e/0xa0 [ 47.386942] check_noncircular+0x1b6/0x1e0 [ 47.387910] ? lockdep_unlock+0x96/0x130 [ 47.388838] __lock_acquire+0x20b1/0x3b50 [ 47.389789] ? d_walk+0x3c3/0x410 [ 47.390589] lock_acquire+0xe3/0x210 [ 47.391449] ? remove_one+0x65/0x210 [ 47.392301] ? preempt_count_sub+0x14/0xc0 [ 47.393264] ? _raw_spin_unlock+0x29/0x40 [ 47.394214] ? d_walk+0x3c3/0x410 [ 47.395005] ? remove_one+0x65/0x210 [ 47.395866] remove_one+0x7d/0x210 [ 47.396675] ? remove_one+0x65/0x210 [ 47.397523] ? d_invalidate+0xbe/0x170 [ 47.398412] simple_recursive_removal+0x325/0x3a0 [ 47.399521] ? debugfs_remove+0x60/0x60 [ 47.400430] debugfs_remove+0x40/0x60 [ 47.401302] zs_destroy_pool+0x4e/0x3d0 [ 47.402214] zram_reset_device+0x151/0x2b0 [ 47.403187] reset_store+0x15b/0x190 [ 47.404043] ? sysfs_kf_read+0x170/0x170 [ 47.404968] kernfs_fop_write_iter+0x1cb/0x270 [ 47.406015] vfs_write+0x42f/0x4c0 [ 47.406829] ksys_write+0x8f/0x110 [ 47.407642] do_syscall_64+0x40/0xe0 [ 47.408491] entry_SYSCALL_64_after_hwframe+0x62/0x6a
On Sat, 2023-12-02 at 15:37 +0900, Sergey Senozhatsky wrote: > On (23/11/09 22:22), Johannes Berg wrote: > > From: Johannes Berg <johannes.berg@intel.com> > > > > When you take a lock in a debugfs handler but also try > > to remove the debugfs file under that lock, things can > > deadlock since the removal has to wait for all users > > to finish. > > > > Add lockdep annotations in debugfs_file_get()/_put() > > to catch such issues. > > So this triggers when I reset zram device (zsmalloc compiled with > CONFIG_ZSMALLOC_STAT). I shouldn't have put that into the rc, that was more or less an accident. I think I'll do a revert. > debugfs_create_file() and debugfs_remove_recursive() are called > under zram->init_lock, and zsmalloc never calls into zram code. > What I don't really get is where does the > `debugfs::classes -> zram->init_lock` > dependency come from? "debugfs:classes" means a file is being accessed and "classes" is the name, so that's static int zs_stats_size_show(struct seq_file *s, void *v) which uses seq_file, so there we have a seq_file lock. I think eventually the issue is that lockdep isn't telling the various seq_file instances apart, and you have one that's removed under lock (classes) and another one that's taking the lock (reset). Anyway, I'll send a revert, don't think this is ready yet. I was fixing the wireless debugfs lockdep and had used that to debug it. johannes
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 23bdfc126b5e..e499d38b1077 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -103,6 +103,12 @@ int debugfs_file_get(struct dentry *dentry) kfree(fsd); fsd = READ_ONCE(dentry->d_fsdata); } +#ifdef CONFIG_LOCKDEP + fsd->lock_name = kasprintf(GFP_KERNEL, "debugfs:%pd", dentry); + lockdep_register_key(&fsd->key); + lockdep_init_map(&fsd->lockdep_map, fsd->lock_name ?: "debugfs", + &fsd->key, 0); +#endif } /* @@ -119,6 +125,8 @@ int debugfs_file_get(struct dentry *dentry) if (!refcount_inc_not_zero(&fsd->active_users)) return -EIO; + lock_map_acquire_read(&fsd->lockdep_map); + return 0; } EXPORT_SYMBOL_GPL(debugfs_file_get); @@ -136,6 +144,8 @@ void debugfs_file_put(struct dentry *dentry) { struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata); + lock_map_release(&fsd->lockdep_map); + if (refcount_dec_and_test(&fsd->active_users)) complete(&fsd->active_users_drained); } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 269bad87d552..a4c77aafb77b 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -241,6 +241,14 @@ static void debugfs_release_dentry(struct dentry *dentry) if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) return; + /* check it wasn't an automount or dir */ + if (fsd && fsd->real_fops) { +#ifdef CONFIG_LOCKDEP + lockdep_unregister_key(&fsd->key); + kfree(fsd->lock_name); +#endif + } + kfree(fsd); } @@ -742,6 +750,10 @@ static void __debugfs_file_removed(struct dentry *dentry) fsd = READ_ONCE(dentry->d_fsdata); if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) return; + + lock_map_acquire(&fsd->lockdep_map); + lock_map_release(&fsd->lockdep_map); + if (!refcount_dec_and_test(&fsd->active_users)) wait_for_completion(&fsd->active_users_drained); } diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h index f7c489b5a368..c7d61cfc97d2 100644 --- a/fs/debugfs/internal.h +++ b/fs/debugfs/internal.h @@ -7,6 +7,7 @@ #ifndef _DEBUGFS_INTERNAL_H_ #define _DEBUGFS_INTERNAL_H_ +#include <linux/lockdep.h> struct file_operations; @@ -23,6 +24,11 @@ struct debugfs_fsdata { struct { refcount_t active_users; struct completion active_users_drained; +#ifdef CONFIG_LOCKDEP + struct lockdep_map lockdep_map; + struct lock_class_key key; + char *lock_name; +#endif }; }; };