Message ID | 0f1a834bcb67f4c57885706b54e19d22e64b9ce7.1712748143.git.fdmanana@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: add a shrinker for extent maps | expand |
在 2024/4/10 20:58, fdmanana@kernel.org 写道: > From: Filipe Manana <fdmanana@suse.com> > > Add a per cpu counter that tracks the total number of extent maps that are > in extent trees of inodes that belong to fs trees. This is going to be > used in an upcoming change that adds a shrinker for extent maps. Only > extent maps for fs trees are considered, because for special trees such as > the data relocation tree we don't want to evict their extent maps which > are critical for the relocation to work, and since those are limited, it's > not a concern to have them in memory during the relocation of a block > group. Another case are extent maps for free space cache inodes, which > must always remain in memory, but those are limited (there's only one per > free space cache inode, which means one per block group). > > Signed-off-by: Filipe Manana <fdmanana@suse.com> [...] > --- a/fs/btrfs/extent_map.c > +++ b/fs/btrfs/extent_map.c > @@ -76,6 +76,14 @@ static u64 range_end(u64 start, u64 len) > return start + len; > } > > +static void dec_evictable_extent_maps(struct btrfs_inode *inode) > +{ > + struct btrfs_fs_info *fs_info = inode->root->fs_info; > + > + if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(inode->root))) > + percpu_counter_dec(&fs_info->evictable_extent_maps); > +} > + > static int tree_insert(struct rb_root_cached *root, struct extent_map *em) > { > struct rb_node **p = &root->rb_root.rb_node; > @@ -223,8 +231,9 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma > return next->block_start == prev->block_start; > } > > -static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) > +static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) Maybe it would be a little easier to read if the extent_map_tree to btrfs_inode conversion happens in a dedicated patch, just like all the previous ones? Otherwise the introduction of the per-cpu counter looks good to me. Thanks, Qu
On Thu, Apr 11, 2024 at 6:39 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: > > > > 在 2024/4/10 20:58, fdmanana@kernel.org 写道: > > From: Filipe Manana <fdmanana@suse.com> > > > > Add a per cpu counter that tracks the total number of extent maps that are > > in extent trees of inodes that belong to fs trees. This is going to be > > used in an upcoming change that adds a shrinker for extent maps. Only > > extent maps for fs trees are considered, because for special trees such as > > the data relocation tree we don't want to evict their extent maps which > > are critical for the relocation to work, and since those are limited, it's > > not a concern to have them in memory during the relocation of a block > > group. Another case are extent maps for free space cache inodes, which > > must always remain in memory, but those are limited (there's only one per > > free space cache inode, which means one per block group). > > > > Signed-off-by: Filipe Manana <fdmanana@suse.com> > [...] > > --- a/fs/btrfs/extent_map.c > > +++ b/fs/btrfs/extent_map.c > > @@ -76,6 +76,14 @@ static u64 range_end(u64 start, u64 len) > > return start + len; > > } > > > > +static void dec_evictable_extent_maps(struct btrfs_inode *inode) > > +{ > > + struct btrfs_fs_info *fs_info = inode->root->fs_info; > > + > > + if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(inode->root))) > > + percpu_counter_dec(&fs_info->evictable_extent_maps); > > +} > > + > > static int tree_insert(struct rb_root_cached *root, struct extent_map *em) > > { > > struct rb_node **p = &root->rb_root.rb_node; > > @@ -223,8 +231,9 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma > > return next->block_start == prev->block_start; > > } > > > > -static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) > > +static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) > > Maybe it would be a little easier to read if the extent_map_tree to > btrfs_inode conversion happens in a dedicated patch, just like all the > previous ones? Sure, I can do that. But it's such a small and trivial change that I didn't think it would make review harder, even because the patch is very small. > > Otherwise the introduction of the per-cpu counter looks good to me. > > Thanks, > Qu
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0474e9b6d302..3c2d35b2062e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1269,6 +1269,8 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); percpu_counter_destroy(&fs_info->ordered_bytes); + ASSERT(percpu_counter_sum_positive(&fs_info->evictable_extent_maps) == 0); + percpu_counter_destroy(&fs_info->evictable_extent_maps); percpu_counter_destroy(&fs_info->dev_replace.bio_counter); btrfs_free_csum_hash(fs_info); btrfs_free_stripe_hash_table(fs_info); @@ -2848,6 +2850,10 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block if (ret) return ret; + ret = percpu_counter_init(&fs_info->evictable_extent_maps, 0, GFP_KERNEL); + if (ret) + return ret; + ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); if (ret) return ret; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 15817b842c24..2fcf28148a81 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -76,6 +76,14 @@ static u64 range_end(u64 start, u64 len) return start + len; } +static void dec_evictable_extent_maps(struct btrfs_inode *inode) +{ + struct btrfs_fs_info *fs_info = inode->root->fs_info; + + if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(inode->root))) + percpu_counter_dec(&fs_info->evictable_extent_maps); +} + static int tree_insert(struct rb_root_cached *root, struct extent_map *em) { struct rb_node **p = &root->rb_root.rb_node; @@ -223,8 +231,9 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma return next->block_start == prev->block_start; } -static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) +static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) { + struct extent_map_tree *tree = &inode->extent_tree; struct extent_map *merge = NULL; struct rb_node *rb; @@ -258,6 +267,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) rb_erase_cached(&merge->rb_node, &tree->map); RB_CLEAR_NODE(&merge->rb_node); free_extent_map(merge); + dec_evictable_extent_maps(inode); } } @@ -272,6 +282,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) em->generation = max(em->generation, merge->generation); em->flags |= EXTENT_FLAG_MERGED; free_extent_map(merge); + dec_evictable_extent_maps(inode); } } @@ -322,7 +333,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) em->generation = gen; em->flags &= ~EXTENT_FLAG_PINNED; - try_merge_map(tree, em); + try_merge_map(inode, em); out: write_unlock(&tree->lock); @@ -333,16 +344,14 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) void clear_em_logging(struct btrfs_inode *inode, struct extent_map *em) { - struct extent_map_tree *tree = &inode->extent_tree; - - lockdep_assert_held_write(&tree->lock); + lockdep_assert_held_write(&inode->extent_tree.lock); em->flags &= ~EXTENT_FLAG_LOGGING; if (extent_map_in_tree(em)) - try_merge_map(tree, em); + try_merge_map(inode, em); } -static inline void setup_extent_mapping(struct extent_map_tree *tree, +static inline void setup_extent_mapping(struct btrfs_inode *inode, struct extent_map *em, int modified) { @@ -351,9 +360,9 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree, ASSERT(list_empty(&em->list)); if (modified) - list_add(&em->list, &tree->modified_extents); + list_add(&em->list, &inode->extent_tree.modified_extents); else - try_merge_map(tree, em); + try_merge_map(inode, em); } /* @@ -373,6 +382,8 @@ static int add_extent_mapping(struct btrfs_inode *inode, struct extent_map *em, int modified) { struct extent_map_tree *tree = &inode->extent_tree; + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; int ret; lockdep_assert_held_write(&tree->lock); @@ -381,7 +392,10 @@ static int add_extent_mapping(struct btrfs_inode *inode, if (ret) return ret; - setup_extent_mapping(tree, em, modified); + setup_extent_mapping(inode, em, modified); + + if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(root))) + percpu_counter_inc(&fs_info->evictable_extent_maps); return 0; } @@ -468,6 +482,8 @@ void remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em) if (!(em->flags & EXTENT_FLAG_LOGGING)) list_del_init(&em->list); RB_CLEAR_NODE(&em->rb_node); + + dec_evictable_extent_maps(inode); } static void replace_extent_mapping(struct btrfs_inode *inode, @@ -486,7 +502,7 @@ static void replace_extent_mapping(struct btrfs_inode *inode, rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->map); RB_CLEAR_NODE(&cur->rb_node); - setup_extent_mapping(tree, new, modified); + setup_extent_mapping(inode, new, modified); } static struct extent_map *next_extent_map(const struct extent_map *em) diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 93f5c57ea4e3..534d30dafe32 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -630,6 +630,8 @@ struct btrfs_fs_info { s32 dirty_metadata_batch; s32 delalloc_batch; + struct percpu_counter evictable_extent_maps; + /* Protected by 'trans_lock'. */ struct list_head dirty_cowonly_roots;