diff mbox series

[08/11] btrfs: add a global per cpu counter to track number of used extent maps

Message ID 0f1a834bcb67f4c57885706b54e19d22e64b9ce7.1712748143.git.fdmanana@suse.com (mailing list archive)
State New
Headers show
Series btrfs: add a shrinker for extent maps | expand

Commit Message

Filipe Manana April 10, 2024, 11:28 a.m. UTC
From: Filipe Manana <fdmanana@suse.com>

Add a per cpu counter that tracks the total number of extent maps that are
in extent trees of inodes that belong to fs trees. This is going to be
used in an upcoming change that adds a shrinker for extent maps. Only
extent maps for fs trees are considered, because for special trees such as
the data relocation tree we don't want to evict their extent maps which
are critical for the relocation to work, and since those are limited, it's
not a concern to have them in memory during the relocation of a block
group. Another case are extent maps for free space cache inodes, which
must always remain in memory, but those are limited (there's only one per
free space cache inode, which means one per block group).

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/disk-io.c    |  6 ++++++
 fs/btrfs/extent_map.c | 38 +++++++++++++++++++++++++++-----------
 fs/btrfs/fs.h         |  2 ++
 3 files changed, 35 insertions(+), 11 deletions(-)

Comments

Qu Wenruo April 11, 2024, 5:39 a.m. UTC | #1
在 2024/4/10 20:58, fdmanana@kernel.org 写道:
> From: Filipe Manana <fdmanana@suse.com>
>
> Add a per cpu counter that tracks the total number of extent maps that are
> in extent trees of inodes that belong to fs trees. This is going to be
> used in an upcoming change that adds a shrinker for extent maps. Only
> extent maps for fs trees are considered, because for special trees such as
> the data relocation tree we don't want to evict their extent maps which
> are critical for the relocation to work, and since those are limited, it's
> not a concern to have them in memory during the relocation of a block
> group. Another case are extent maps for free space cache inodes, which
> must always remain in memory, but those are limited (there's only one per
> free space cache inode, which means one per block group).
>
> Signed-off-by: Filipe Manana <fdmanana@suse.com>
[...]
> --- a/fs/btrfs/extent_map.c
> +++ b/fs/btrfs/extent_map.c
> @@ -76,6 +76,14 @@ static u64 range_end(u64 start, u64 len)
>   	return start + len;
>   }
>
> +static void dec_evictable_extent_maps(struct btrfs_inode *inode)
> +{
> +	struct btrfs_fs_info *fs_info = inode->root->fs_info;
> +
> +	if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(inode->root)))
> +		percpu_counter_dec(&fs_info->evictable_extent_maps);
> +}
> +
>   static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
>   {
>   	struct rb_node **p = &root->rb_root.rb_node;
> @@ -223,8 +231,9 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
>   	return next->block_start == prev->block_start;
>   }
>
> -static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
> +static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)

Maybe it would be a little easier to read if the extent_map_tree to
btrfs_inode conversion happens in a dedicated patch, just like all the
previous ones?

Otherwise the introduction of the per-cpu counter looks good to me.

Thanks,
Qu
Filipe Manana April 11, 2024, 10:09 a.m. UTC | #2
On Thu, Apr 11, 2024 at 6:39 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>
>
>
> 在 2024/4/10 20:58, fdmanana@kernel.org 写道:
> > From: Filipe Manana <fdmanana@suse.com>
> >
> > Add a per cpu counter that tracks the total number of extent maps that are
> > in extent trees of inodes that belong to fs trees. This is going to be
> > used in an upcoming change that adds a shrinker for extent maps. Only
> > extent maps for fs trees are considered, because for special trees such as
> > the data relocation tree we don't want to evict their extent maps which
> > are critical for the relocation to work, and since those are limited, it's
> > not a concern to have them in memory during the relocation of a block
> > group. Another case are extent maps for free space cache inodes, which
> > must always remain in memory, but those are limited (there's only one per
> > free space cache inode, which means one per block group).
> >
> > Signed-off-by: Filipe Manana <fdmanana@suse.com>
> [...]
> > --- a/fs/btrfs/extent_map.c
> > +++ b/fs/btrfs/extent_map.c
> > @@ -76,6 +76,14 @@ static u64 range_end(u64 start, u64 len)
> >       return start + len;
> >   }
> >
> > +static void dec_evictable_extent_maps(struct btrfs_inode *inode)
> > +{
> > +     struct btrfs_fs_info *fs_info = inode->root->fs_info;
> > +
> > +     if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(inode->root)))
> > +             percpu_counter_dec(&fs_info->evictable_extent_maps);
> > +}
> > +
> >   static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
> >   {
> >       struct rb_node **p = &root->rb_root.rb_node;
> > @@ -223,8 +231,9 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
> >       return next->block_start == prev->block_start;
> >   }
> >
> > -static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
> > +static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
>
> Maybe it would be a little easier to read if the extent_map_tree to
> btrfs_inode conversion happens in a dedicated patch, just like all the
> previous ones?

Sure, I can do that. But it's such a small and trivial change that I
didn't think it would make review harder, even because the patch is
very small.

>
> Otherwise the introduction of the per-cpu counter looks good to me.
>
> Thanks,
> Qu
diff mbox series

Patch

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0474e9b6d302..3c2d35b2062e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1269,6 +1269,8 @@  void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
 	percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
 	percpu_counter_destroy(&fs_info->delalloc_bytes);
 	percpu_counter_destroy(&fs_info->ordered_bytes);
+	ASSERT(percpu_counter_sum_positive(&fs_info->evictable_extent_maps) == 0);
+	percpu_counter_destroy(&fs_info->evictable_extent_maps);
 	percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
 	btrfs_free_csum_hash(fs_info);
 	btrfs_free_stripe_hash_table(fs_info);
@@ -2848,6 +2850,10 @@  static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
 	if (ret)
 		return ret;
 
+	ret = percpu_counter_init(&fs_info->evictable_extent_maps, 0, GFP_KERNEL);
+	if (ret)
+		return ret;
+
 	ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
 	if (ret)
 		return ret;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 15817b842c24..2fcf28148a81 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -76,6 +76,14 @@  static u64 range_end(u64 start, u64 len)
 	return start + len;
 }
 
+static void dec_evictable_extent_maps(struct btrfs_inode *inode)
+{
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+	if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(inode->root)))
+		percpu_counter_dec(&fs_info->evictable_extent_maps);
+}
+
 static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
 {
 	struct rb_node **p = &root->rb_root.rb_node;
@@ -223,8 +231,9 @@  static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
 	return next->block_start == prev->block_start;
 }
 
-static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
+static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
 {
+	struct extent_map_tree *tree = &inode->extent_tree;
 	struct extent_map *merge = NULL;
 	struct rb_node *rb;
 
@@ -258,6 +267,7 @@  static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
 			rb_erase_cached(&merge->rb_node, &tree->map);
 			RB_CLEAR_NODE(&merge->rb_node);
 			free_extent_map(merge);
+			dec_evictable_extent_maps(inode);
 		}
 	}
 
@@ -272,6 +282,7 @@  static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
 		em->generation = max(em->generation, merge->generation);
 		em->flags |= EXTENT_FLAG_MERGED;
 		free_extent_map(merge);
+		dec_evictable_extent_maps(inode);
 	}
 }
 
@@ -322,7 +333,7 @@  int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
 	em->generation = gen;
 	em->flags &= ~EXTENT_FLAG_PINNED;
 
-	try_merge_map(tree, em);
+	try_merge_map(inode, em);
 
 out:
 	write_unlock(&tree->lock);
@@ -333,16 +344,14 @@  int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
 
 void clear_em_logging(struct btrfs_inode *inode, struct extent_map *em)
 {
-	struct extent_map_tree *tree = &inode->extent_tree;
-
-	lockdep_assert_held_write(&tree->lock);
+	lockdep_assert_held_write(&inode->extent_tree.lock);
 
 	em->flags &= ~EXTENT_FLAG_LOGGING;
 	if (extent_map_in_tree(em))
-		try_merge_map(tree, em);
+		try_merge_map(inode, em);
 }
 
-static inline void setup_extent_mapping(struct extent_map_tree *tree,
+static inline void setup_extent_mapping(struct btrfs_inode *inode,
 					struct extent_map *em,
 					int modified)
 {
@@ -351,9 +360,9 @@  static inline void setup_extent_mapping(struct extent_map_tree *tree,
 	ASSERT(list_empty(&em->list));
 
 	if (modified)
-		list_add(&em->list, &tree->modified_extents);
+		list_add(&em->list, &inode->extent_tree.modified_extents);
 	else
-		try_merge_map(tree, em);
+		try_merge_map(inode, em);
 }
 
 /*
@@ -373,6 +382,8 @@  static int add_extent_mapping(struct btrfs_inode *inode,
 			      struct extent_map *em, int modified)
 {
 	struct extent_map_tree *tree = &inode->extent_tree;
+	struct btrfs_root *root = inode->root;
+	struct btrfs_fs_info *fs_info = root->fs_info;
 	int ret;
 
 	lockdep_assert_held_write(&tree->lock);
@@ -381,7 +392,10 @@  static int add_extent_mapping(struct btrfs_inode *inode,
 	if (ret)
 		return ret;
 
-	setup_extent_mapping(tree, em, modified);
+	setup_extent_mapping(inode, em, modified);
+
+	if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(root)))
+		percpu_counter_inc(&fs_info->evictable_extent_maps);
 
 	return 0;
 }
@@ -468,6 +482,8 @@  void remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em)
 	if (!(em->flags & EXTENT_FLAG_LOGGING))
 		list_del_init(&em->list);
 	RB_CLEAR_NODE(&em->rb_node);
+
+	dec_evictable_extent_maps(inode);
 }
 
 static void replace_extent_mapping(struct btrfs_inode *inode,
@@ -486,7 +502,7 @@  static void replace_extent_mapping(struct btrfs_inode *inode,
 	rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->map);
 	RB_CLEAR_NODE(&cur->rb_node);
 
-	setup_extent_mapping(tree, new, modified);
+	setup_extent_mapping(inode, new, modified);
 }
 
 static struct extent_map *next_extent_map(const struct extent_map *em)
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 93f5c57ea4e3..534d30dafe32 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -630,6 +630,8 @@  struct btrfs_fs_info {
 	s32 dirty_metadata_batch;
 	s32 delalloc_batch;
 
+	struct percpu_counter evictable_extent_maps;
+
 	/* Protected by 'trans_lock'. */
 	struct list_head dirty_cowonly_roots;