diff mbox series

[RFC,2/6] debugfs: annotate debugfs handlers vs. removal with lockdep

Message ID 20231109222251.a62811ebde9b.Ia70a49792c448867fd61b0234e1da507b0f75086@changeid (mailing list archive)
State RFC
Delegated to: Johannes Berg
Headers show
Series debugfs/wifi: locking fixes | expand

Commit Message

Johannes Berg Nov. 9, 2023, 9:22 p.m. UTC
From: Johannes Berg <johannes.berg@intel.com>

When you take a lock in a debugfs handler but also try
to remove the debugfs file under that lock, things can
deadlock since the removal has to wait for all users
to finish.

Add lockdep annotations in debugfs_file_get()/_put()
to catch such issues.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 fs/debugfs/file.c     | 10 ++++++++++
 fs/debugfs/inode.c    | 12 ++++++++++++
 fs/debugfs/internal.h |  6 ++++++
 3 files changed, 28 insertions(+)

Comments

Sergey Senozhatsky Dec. 2, 2023, 6:37 a.m. UTC | #1
On (23/11/09 22:22), Johannes Berg wrote:
> From: Johannes Berg <johannes.berg@intel.com>
> 
> When you take a lock in a debugfs handler but also try
> to remove the debugfs file under that lock, things can
> deadlock since the removal has to wait for all users
> to finish.
> 
> Add lockdep annotations in debugfs_file_get()/_put()
> to catch such issues.

So this triggers when I reset zram device (zsmalloc compiled with
CONFIG_ZSMALLOC_STAT).

debugfs_create_file() and debugfs_remove_recursive() are called
under zram->init_lock, and zsmalloc never calls into zram code.
What I don't really get is where does the
	`debugfs::classes -> zram->init_lock`
dependency come from?

[   47.283364] ======================================================
[   47.284790] WARNING: possible circular locking dependency detected
[   47.286217] 6.7.0-rc3-next-20231201+ #239 Tainted: G                 N
[   47.287723] ------------------------------------------------------
[   47.289145] zram-test.sh/727 is trying to acquire lock:
[   47.290350] ffff88814b824070 (debugfs:classes){++++}-{0:0}, at: remove_one+0x65/0x210
[   47.292202] 
[   47.292202] but task is already holding lock:
[   47.293554] ffff88812fe7ee48 (&sb->s_type->i_mutex_key#2){++++}-{3:3}, at: simple_recursive_removal+0x217/0x3a0
[   47.295895] 
[   47.295895] which lock already depends on the new lock.
[   47.295895] 
[   47.297757] 
[   47.297757] the existing dependency chain (in reverse order) is:
[   47.299498] 
[   47.299498] -> #4 (&sb->s_type->i_mutex_key#2){++++}-{3:3}:
[   47.301129]        down_write+0x40/0x80
[   47.302028]        start_creating+0xa5/0x1b0
[   47.303024]        debugfs_create_dir+0x16/0x240
[   47.304104]        zs_create_pool+0x5da/0x6f0
[   47.305123]        disksize_store+0xce/0x320
[   47.306129]        kernfs_fop_write_iter+0x1cb/0x270
[   47.307279]        vfs_write+0x42f/0x4c0
[   47.308205]        ksys_write+0x8f/0x110
[   47.309129]        do_syscall_64+0x40/0xe0
[   47.310100]        entry_SYSCALL_64_after_hwframe+0x62/0x6a
[   47.311403] 
[   47.311403] -> #3 (&zram->init_lock){++++}-{3:3}:
[   47.312856]        down_write+0x40/0x80
[   47.313755]        zram_reset_device+0x22/0x2b0
[   47.314814]        reset_store+0x15b/0x190
[   47.315788]        kernfs_fop_write_iter+0x1cb/0x270
[   47.316946]        vfs_write+0x42f/0x4c0
[   47.317869]        ksys_write+0x8f/0x110
[   47.318787]        do_syscall_64+0x40/0xe0
[   47.319754]        entry_SYSCALL_64_after_hwframe+0x62/0x6a
[   47.321051] 
[   47.321051] -> #2 (&of->mutex){+.+.}-{3:3}:
[   47.322374]        __mutex_lock+0x97/0x810
[   47.323338]        kernfs_seq_start+0x34/0x190
[   47.324387]        seq_read_iter+0x1e1/0x6c0
[   47.325389]        vfs_read+0x38f/0x420
[   47.326295]        ksys_read+0x8f/0x110
[   47.327200]        do_syscall_64+0x40/0xe0
[   47.328164]        entry_SYSCALL_64_after_hwframe+0x62/0x6a
[   47.329460] 
[   47.329460] -> #1 (&p->lock){+.+.}-{3:3}:
[   47.330740]        __mutex_lock+0x97/0x810
[   47.331717]        seq_read_iter+0x5c/0x6c0
[   47.332696]        seq_read+0xfe/0x140
[   47.333577]        full_proxy_read+0x90/0x110
[   47.334598]        vfs_read+0xfb/0x420
[   47.335482]        ksys_read+0x8f/0x110
[   47.336382]        do_syscall_64+0x40/0xe0
[   47.337344]        entry_SYSCALL_64_after_hwframe+0x62/0x6a
[   47.338636] 
[   47.338636] -> #0 (debugfs:classes){++++}-{0:0}:
[   47.340056]        __lock_acquire+0x20b1/0x3b50
[   47.341117]        lock_acquire+0xe3/0x210
[   47.342072]        remove_one+0x7d/0x210
[   47.342991]        simple_recursive_removal+0x325/0x3a0
[   47.344210]        debugfs_remove+0x40/0x60
[   47.345181]        zs_destroy_pool+0x4e/0x3d0
[   47.346199]        zram_reset_device+0x151/0x2b0
[   47.347272]        reset_store+0x15b/0x190
[   47.348257]        kernfs_fop_write_iter+0x1cb/0x270
[   47.349426]        vfs_write+0x42f/0x4c0
[   47.350350]        ksys_write+0x8f/0x110
[   47.351273]        do_syscall_64+0x40/0xe0
[   47.352239]        entry_SYSCALL_64_after_hwframe+0x62/0x6a
[   47.353536] 
[   47.353536] other info that might help us debug this:
[   47.353536] 
[   47.355381] Chain exists of:
[   47.355381]   debugfs:classes --> &zram->init_lock --> &sb->s_type->i_mutex_key#2
[   47.355381] 
[   47.358105]  Possible unsafe locking scenario:
[   47.358105] 
[   47.359484]        CPU0                    CPU1
[   47.360545]        ----                    ----
[   47.361599]   lock(&sb->s_type->i_mutex_key#2);
[   47.362665]                                lock(&zram->init_lock);
[   47.364146]                                lock(&sb->s_type->i_mutex_key#2);
[   47.365781]   lock(debugfs:classes);
[   47.366626] 
[   47.366626]  *** DEADLOCK ***
[   47.366626] 
[   47.368005] 5 locks held by zram-test.sh/727:
[   47.369028]  #0: ffff88811420c420 (sb_writers#5){.+.+}-{0:0}, at: vfs_write+0xff/0x4c0
[   47.370873]  #1: ffff8881346e1090 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x126/0x270
[   47.372932]  #2: ffff88810b7bfb38 (kn->active#50){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x137/0x270
[   47.375052]  #3: ffff88810b3e60b0 (&zram->init_lock){++++}-{3:3}, at: zram_reset_device+0x22/0x2b0
[   47.377131]  #4: ffff88812fe7ee48 (&sb->s_type->i_mutex_key#2){++++}-{3:3}, at: simple_recursive_removal+0x217/0x3a0
[   47.379599] 
[   47.379599] stack backtrace:
[   47.380619] CPU: 39 PID: 727 Comm: zram-test.sh Tainted: G                 N 6.7.0-rc3-next-20231201+ #239
[   47.382836] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[   47.384976] Call Trace:
[   47.385565]  <TASK>
[   47.386074]  dump_stack_lvl+0x6e/0xa0
[   47.386942]  check_noncircular+0x1b6/0x1e0
[   47.387910]  ? lockdep_unlock+0x96/0x130
[   47.388838]  __lock_acquire+0x20b1/0x3b50
[   47.389789]  ? d_walk+0x3c3/0x410
[   47.390589]  lock_acquire+0xe3/0x210
[   47.391449]  ? remove_one+0x65/0x210
[   47.392301]  ? preempt_count_sub+0x14/0xc0
[   47.393264]  ? _raw_spin_unlock+0x29/0x40
[   47.394214]  ? d_walk+0x3c3/0x410
[   47.395005]  ? remove_one+0x65/0x210
[   47.395866]  remove_one+0x7d/0x210
[   47.396675]  ? remove_one+0x65/0x210
[   47.397523]  ? d_invalidate+0xbe/0x170
[   47.398412]  simple_recursive_removal+0x325/0x3a0
[   47.399521]  ? debugfs_remove+0x60/0x60
[   47.400430]  debugfs_remove+0x40/0x60
[   47.401302]  zs_destroy_pool+0x4e/0x3d0
[   47.402214]  zram_reset_device+0x151/0x2b0
[   47.403187]  reset_store+0x15b/0x190
[   47.404043]  ? sysfs_kf_read+0x170/0x170
[   47.404968]  kernfs_fop_write_iter+0x1cb/0x270
[   47.406015]  vfs_write+0x42f/0x4c0
[   47.406829]  ksys_write+0x8f/0x110
[   47.407642]  do_syscall_64+0x40/0xe0
[   47.408491]  entry_SYSCALL_64_after_hwframe+0x62/0x6a
Johannes Berg Dec. 2, 2023, 10:40 a.m. UTC | #2
On Sat, 2023-12-02 at 15:37 +0900, Sergey Senozhatsky wrote:
> On (23/11/09 22:22), Johannes Berg wrote:
> > From: Johannes Berg <johannes.berg@intel.com>
> > 
> > When you take a lock in a debugfs handler but also try
> > to remove the debugfs file under that lock, things can
> > deadlock since the removal has to wait for all users
> > to finish.
> > 
> > Add lockdep annotations in debugfs_file_get()/_put()
> > to catch such issues.
> 
> So this triggers when I reset zram device (zsmalloc compiled with
> CONFIG_ZSMALLOC_STAT).

I shouldn't have put that into the rc, that was more or less an
accident. I think I'll do a revert.

> debugfs_create_file() and debugfs_remove_recursive() are called
> under zram->init_lock, and zsmalloc never calls into zram code.
> What I don't really get is where does the
> 	`debugfs::classes -> zram->init_lock`
> dependency come from?

"debugfs:classes" means a file is being accessed and "classes" is the
name, so that's 

static int zs_stats_size_show(struct seq_file *s, void *v)

which uses seq_file, so there we have a seq_file lock.

I think eventually the issue is that lockdep isn't telling the various
seq_file instances apart, and you have one that's removed under lock
(classes) and another one that's taking the lock (reset).

Anyway, I'll send a revert, don't think this is ready yet. I was fixing
the wireless debugfs lockdep and had used that to debug it.

johannes
diff mbox series

Patch

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 23bdfc126b5e..e499d38b1077 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -103,6 +103,12 @@  int debugfs_file_get(struct dentry *dentry)
 			kfree(fsd);
 			fsd = READ_ONCE(dentry->d_fsdata);
 		}
+#ifdef CONFIG_LOCKDEP
+		fsd->lock_name = kasprintf(GFP_KERNEL, "debugfs:%pd", dentry);
+		lockdep_register_key(&fsd->key);
+		lockdep_init_map(&fsd->lockdep_map, fsd->lock_name ?: "debugfs",
+				 &fsd->key, 0);
+#endif
 	}
 
 	/*
@@ -119,6 +125,8 @@  int debugfs_file_get(struct dentry *dentry)
 	if (!refcount_inc_not_zero(&fsd->active_users))
 		return -EIO;
 
+	lock_map_acquire_read(&fsd->lockdep_map);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(debugfs_file_get);
@@ -136,6 +144,8 @@  void debugfs_file_put(struct dentry *dentry)
 {
 	struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata);
 
+	lock_map_release(&fsd->lockdep_map);
+
 	if (refcount_dec_and_test(&fsd->active_users))
 		complete(&fsd->active_users_drained);
 }
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 269bad87d552..a4c77aafb77b 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -241,6 +241,14 @@  static void debugfs_release_dentry(struct dentry *dentry)
 	if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
 		return;
 
+	/* check it wasn't an automount or dir */
+	if (fsd && fsd->real_fops) {
+#ifdef CONFIG_LOCKDEP
+		lockdep_unregister_key(&fsd->key);
+		kfree(fsd->lock_name);
+#endif
+	}
+
 	kfree(fsd);
 }
 
@@ -742,6 +750,10 @@  static void __debugfs_file_removed(struct dentry *dentry)
 	fsd = READ_ONCE(dentry->d_fsdata);
 	if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
 		return;
+
+	lock_map_acquire(&fsd->lockdep_map);
+	lock_map_release(&fsd->lockdep_map);
+
 	if (!refcount_dec_and_test(&fsd->active_users))
 		wait_for_completion(&fsd->active_users_drained);
 }
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index f7c489b5a368..c7d61cfc97d2 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -7,6 +7,7 @@ 
 
 #ifndef _DEBUGFS_INTERNAL_H_
 #define _DEBUGFS_INTERNAL_H_
+#include <linux/lockdep.h>
 
 struct file_operations;
 
@@ -23,6 +24,11 @@  struct debugfs_fsdata {
 		struct {
 			refcount_t active_users;
 			struct completion active_users_drained;
+#ifdef CONFIG_LOCKDEP
+			struct lockdep_map lockdep_map;
+			struct lock_class_key key;
+			char *lock_name;
+#endif
 		};
 	};
 };