diff mbox series

[v2,5/5] btrfs: Add a lockdep model for the ordered extents wait event

Message ID 20220719040954.3964407-6-iangelak@fb.com (mailing list archive)
State New, archived
Headers show
Series btrfs: Annotate wait events with lockdep | expand

Commit Message

Ioannis Angelakopoulos July 19, 2022, 4:10 a.m. UTC
This wait event is very similar to the pending_ordered wait event in the
sense that it occurs in a different context than the condition signaling
for the event. The signaling occurs in btrfs_remove_ordered_extent() while
the wait event is implemented in btrfs_start_ordered_extent() in
fs/btrfs/ordered-data.c

However, in this case a thread must not acquire the lockdep map for the
ordered extents wait event when the ordered extent is related to a free
space inode. That is because lockdep creates dependencies between locks
acquired both in execution paths related to normal inodes and paths related
to free space inodes, thus leading to false positives.

Also to prevent false positives related to free space inodes and normal
inodes, the lockdep map class for the inode->mapping->invalidate_lock is
reinitialized in load_free_space_cache() in fs/btrfs/free-space-cache.c

Signed-off-by: Ioannis Angelakopoulos <iangelak@fb.com>
---
 fs/btrfs/ctree.h            |  1 +
 fs/btrfs/disk-io.c          |  4 ++++
 fs/btrfs/free-space-cache.c | 11 +++++++++++
 fs/btrfs/inode.c            | 13 +++++++++++++
 fs/btrfs/ordered-data.c     | 18 ++++++++++++++++++
 5 files changed, 47 insertions(+)

Comments

Josef Bacik July 20, 2022, 2:50 p.m. UTC | #1
On Mon, Jul 18, 2022 at 09:10:00PM -0700, Ioannis Angelakopoulos wrote:
> This wait event is very similar to the pending_ordered wait event in the
> sense that it occurs in a different context than the condition signaling
> for the event. The signaling occurs in btrfs_remove_ordered_extent() while
> the wait event is implemented in btrfs_start_ordered_extent() in
> fs/btrfs/ordered-data.c
> 
> However, in this case a thread must not acquire the lockdep map for the
> ordered extents wait event when the ordered extent is related to a free
> space inode. That is because lockdep creates dependencies between locks
> acquired both in execution paths related to normal inodes and paths related
> to free space inodes, thus leading to false positives.
> 
> Also to prevent false positives related to free space inodes and normal
> inodes, the lockdep map class for the inode->mapping->invalidate_lock is
> reinitialized in load_free_space_cache() in fs/btrfs/free-space-cache.c
> 

Make this bit a separate patch, put it before this one with an explanation of
why so we have a commit per logical change.  Thanks,

Josef
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 661d19ac41d1..941987ed4c43 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1099,6 +1099,7 @@  struct btrfs_fs_info {
 	struct lockdep_map btrfs_trans_num_extwriters_map;
 	struct lockdep_map btrfs_state_change_map[4];
 	struct lockdep_map btrfs_trans_pending_ordered_map;
+	struct lockdep_map btrfs_ordered_extent_map;
 
 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
 	spinlock_t ref_verify_lock;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e9956cbf653f..8f7a07362efb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3053,6 +3053,7 @@  void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
 	static struct lock_class_key btrfs_trans_sup_committed_key;
 	static struct lock_class_key btrfs_trans_completed_key;
 	static struct lock_class_key btrfs_trans_pending_ordered_key;
+	static struct lock_class_key btrfs_ordered_extent_key;
 
 	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
 	INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
@@ -3104,6 +3105,9 @@  void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
 	lockdep_init_map(&fs_info->btrfs_state_change_map[3],
 					 "btrfs_trans_completed",
 					 &btrfs_trans_completed_key, 0);
+	lockdep_init_map(&fs_info->btrfs_ordered_extent_map,
+					 "btrfs_ordered_extent",
+					 &btrfs_ordered_extent_key, 0);
 
 	INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
 	INIT_LIST_HEAD(&fs_info->space_info);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 996da650ecdc..1e41a01a782b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -914,6 +914,8 @@  static int copy_free_space_cache(struct btrfs_block_group *block_group,
 	return ret;
 }
 
+static struct lock_class_key btrfs_free_space_inode_key;
+
 int load_free_space_cache(struct btrfs_block_group *block_group)
 {
 	struct btrfs_fs_info *fs_info = block_group->fs_info;
@@ -924,6 +926,7 @@  int load_free_space_cache(struct btrfs_block_group *block_group)
 	int ret = 0;
 	bool matched;
 	u64 used = block_group->used;
+	struct address_space *mapping;
 
 	/*
 	 * Because we could potentially discard our loaded free space, we want
@@ -983,6 +986,14 @@  int load_free_space_cache(struct btrfs_block_group *block_group)
 	}
 	spin_unlock(&block_group->lock);
 
+	/*
+	 * Reinitialize the class of the inode->mapping->invalidate_lock for free
+	 * space inodes to prevent false positives related to the ordered extents
+	 * lockdep map.
+	 */
+	mapping = &inode->i_data;
+	lockdep_set_class(&mapping->invalidate_lock, &btrfs_free_space_inode_key);
+
 	ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
 				      path, block_group->start);
 	btrfs_free_path(path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f20740812e5b..36f973ffbd26 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3223,6 +3223,8 @@  int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		clear_bits |= EXTENT_DELALLOC_NEW;
 
 	freespace_inode = btrfs_is_free_space_inode(inode);
+	if (!freespace_inode)
+		btrfs_lockdep_acquire(fs_info, btrfs_ordered_extent);
 
 	if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
 		ret = -EIO;
@@ -8952,6 +8954,7 @@  void btrfs_destroy_inode(struct inode *vfs_inode)
 	struct btrfs_ordered_extent *ordered;
 	struct btrfs_inode *inode = BTRFS_I(vfs_inode);
 	struct btrfs_root *root = inode->root;
+	bool freespace_inode;
 
 	WARN_ON(!hlist_empty(&vfs_inode->i_dentry));
 	WARN_ON(vfs_inode->i_data.nrpages);
@@ -8973,6 +8976,12 @@  void btrfs_destroy_inode(struct inode *vfs_inode)
 	if (!root)
 		return;
 
+	/*
+	 * If this is a free space inode do not take the ordered extents lockdep
+	 * map.
+	 */
+	freespace_inode = btrfs_is_free_space_inode(inode);
+
 	while (1) {
 		ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
 		if (!ordered)
@@ -8981,6 +8990,10 @@  void btrfs_destroy_inode(struct inode *vfs_inode)
 			btrfs_err(root->fs_info,
 				  "found ordered extent %llu %llu on inode cleanup",
 				  ordered->file_offset, ordered->num_bytes);
+
+			if (!freespace_inode)
+				btrfs_lockdep_acquire(root->fs_info, btrfs_ordered_extent);
+
 			btrfs_remove_ordered_extent(inode, ordered);
 			btrfs_put_ordered_extent(ordered);
 			btrfs_put_ordered_extent(ordered);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2a4cb6db42d1..eb24a6d20ff8 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -524,6 +524,13 @@  void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct rb_node *node;
 	bool pending;
+	bool freespace_inode;
+
+	/*
+	 * If this is a free space inode the thread has not acquired the ordered
+	 * extents lockdep map.
+	 */
+	freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
 
 	btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered);
 	/* This is paired with btrfs_add_ordered_extent. */
@@ -597,6 +604,8 @@  void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
 	}
 	spin_unlock(&root->ordered_extent_lock);
 	wake_up(&entry->wait);
+	if (!freespace_inode)
+		btrfs_lockdep_release(fs_info, btrfs_ordered_extent);
 }
 
 static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
@@ -715,9 +724,16 @@  void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait)
 	u64 start = entry->file_offset;
 	u64 end = start + entry->num_bytes - 1;
 	struct btrfs_inode *inode = BTRFS_I(entry->inode);
+	bool freespace_inode;
 
 	trace_btrfs_ordered_extent_start(inode, entry);
 
+	/*
+	 * If this is a free space inode do not take the ordered extents lockdep
+	 * map.
+	 */
+	freespace_inode = btrfs_is_free_space_inode(inode);
+
 	/*
 	 * pages in the range can be dirty, clean or writeback.  We
 	 * start IO on any dirty ones so the wait doesn't stall waiting
@@ -726,6 +742,8 @@  void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait)
 	if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
 		filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end);
 	if (wait) {
+		if (!freespace_inode)
+			btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent);
 		wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
 						 &entry->flags));
 	}