diff mbox series

[v2,10/10] btrfs: use a regular rb_root instead of cached rb_root for extent_map_tree

Message ID 37b63a8da723a934b72c5fe00b49922fcec5f5c7.1715362104.git.fdmanana@suse.com (mailing list archive)
State New
Headers show
Series btrfs: inode management and memory consumption improvements | expand

Commit Message

Filipe Manana May 10, 2024, 5:32 p.m. UTC
From: Filipe Manana <fdmanana@suse.com>

We are currently using a cached rb_root (struct rb_root_cached) for the
rb root of struct extent_map_tree. This does't offer much of an advantage
here because:

1) It's only advantage over the regular rb_root is that it caches a
   pointer to the left most node (first node), so a call to
   rb_first_cached() doesn't have to chase pointers until it reaches
   the left most node;

2) We only have two scenarios that access left most node with
   rb_first_cached():

      When dropping all extent maps from an inode, during inode eviction;

      When iterating over extent maps during the extent map shrinker;

3) In both cases we keep removing extent maps, which causes deletion of
   the left most node so rb_erase_cached() has to call rb_next() to find
   out what's the next left most node and assign it to
   struct rb_root_cached::rb_leftmost;

4) We can do that ourselves in those two uses cases and stop using a
   rb_root_cached rb tree and use instead a regular rb_root rb tree.

   This reduces the size of struct extent_map_tree by 8 bytes and, since
   this structure is embedded in struct btrfs_inode, it also reduces the
   size of that structure by 8 bytes.

   So on a 64 bits platform the size of btrfs_inode is reduced from 1032
   bytes down to 1024 bytes.

   This means we will be able to have 4 inodes per 4K page instead of 3.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/extent_map.c             | 48 +++++++++++++++++--------------
 fs/btrfs/extent_map.h             |  2 +-
 fs/btrfs/tests/extent-map-tests.c |  6 ++--
 3 files changed, 30 insertions(+), 26 deletions(-)

Comments

David Sterba May 14, 2024, 3:58 p.m. UTC | #1
On Fri, May 10, 2024 at 06:32:58PM +0100, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> We are currently using a cached rb_root (struct rb_root_cached) for the
> rb root of struct extent_map_tree. This does't offer much of an advantage
> here because:

Good catch, this one escaped me, we need the RB tree tracking so I did not
consider it, the cached pointer was hidden.
diff mbox series

Patch

diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 4bc41b0dd701..35e163152dbc 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -33,7 +33,7 @@  void __cold extent_map_exit(void)
  */
 void extent_map_tree_init(struct extent_map_tree *tree)
 {
-	tree->root = RB_ROOT_CACHED;
+	tree->root = RB_ROOT;
 	INIT_LIST_HEAD(&tree->modified_extents);
 	rwlock_init(&tree->lock);
 }
@@ -85,27 +85,24 @@  static void dec_evictable_extent_maps(struct btrfs_inode *inode)
 		percpu_counter_dec(&fs_info->evictable_extent_maps);
 }
 
-static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
+static int tree_insert(struct rb_root *root, struct extent_map *em)
 {
-	struct rb_node **p = &root->rb_root.rb_node;
+	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
 	struct extent_map *entry = NULL;
 	struct rb_node *orig_parent = NULL;
 	u64 end = range_end(em->start, em->len);
-	bool leftmost = true;
 
 	while (*p) {
 		parent = *p;
 		entry = rb_entry(parent, struct extent_map, rb_node);
 
-		if (em->start < entry->start) {
+		if (em->start < entry->start)
 			p = &(*p)->rb_left;
-		} else if (em->start >= extent_map_end(entry)) {
+		else if (em->start >= extent_map_end(entry))
 			p = &(*p)->rb_right;
-			leftmost = false;
-		} else {
+		else
 			return -EEXIST;
-		}
 	}
 
 	orig_parent = parent;
@@ -128,7 +125,7 @@  static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
 			return -EEXIST;
 
 	rb_link_node(&em->rb_node, orig_parent, p);
-	rb_insert_color_cached(&em->rb_node, root, leftmost);
+	rb_insert_color(&em->rb_node, root);
 	return 0;
 }
 
@@ -265,7 +262,7 @@  static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
 			em->generation = max(em->generation, merge->generation);
 			em->flags |= EXTENT_FLAG_MERGED;
 
-			rb_erase_cached(&merge->rb_node, &tree->root);
+			rb_erase(&merge->rb_node, &tree->root);
 			RB_CLEAR_NODE(&merge->rb_node);
 			free_extent_map(merge);
 			dec_evictable_extent_maps(inode);
@@ -278,7 +275,7 @@  static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
 	if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
 		em->len += merge->len;
 		em->block_len += merge->block_len;
-		rb_erase_cached(&merge->rb_node, &tree->root);
+		rb_erase(&merge->rb_node, &tree->root);
 		RB_CLEAR_NODE(&merge->rb_node);
 		em->generation = max(em->generation, merge->generation);
 		em->flags |= EXTENT_FLAG_MERGED;
@@ -410,7 +407,7 @@  __lookup_extent_mapping(struct extent_map_tree *tree,
 	struct rb_node *prev_or_next = NULL;
 	u64 end = range_end(start, len);
 
-	rb_node = __tree_search(&tree->root.rb_root, start, &prev_or_next);
+	rb_node = __tree_search(&tree->root, start, &prev_or_next);
 	if (!rb_node) {
 		if (prev_or_next)
 			rb_node = prev_or_next;
@@ -479,7 +476,7 @@  void remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em)
 	lockdep_assert_held_write(&tree->lock);
 
 	WARN_ON(em->flags & EXTENT_FLAG_PINNED);
-	rb_erase_cached(&em->rb_node, &tree->root);
+	rb_erase(&em->rb_node, &tree->root);
 	if (!(em->flags & EXTENT_FLAG_LOGGING))
 		list_del_init(&em->list);
 	RB_CLEAR_NODE(&em->rb_node);
@@ -500,7 +497,7 @@  static void replace_extent_mapping(struct btrfs_inode *inode,
 	ASSERT(extent_map_in_tree(cur));
 	if (!(cur->flags & EXTENT_FLAG_LOGGING))
 		list_del_init(&cur->list);
-	rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->root);
+	rb_replace_node(&cur->rb_node, &new->rb_node, &tree->root);
 	RB_CLEAR_NODE(&cur->rb_node);
 
 	setup_extent_mapping(inode, new, modified);
@@ -657,18 +654,23 @@  int btrfs_add_extent_mapping(struct btrfs_inode *inode,
 static void drop_all_extent_maps_fast(struct btrfs_inode *inode)
 {
 	struct extent_map_tree *tree = &inode->extent_tree;
+	struct rb_node *node;
 
 	write_lock(&tree->lock);
-	while (!RB_EMPTY_ROOT(&tree->root.rb_root)) {
+	node = rb_first(&tree->root);
+	while (node) {
 		struct extent_map *em;
-		struct rb_node *node;
+		struct rb_node *next = rb_next(node);
 
-		node = rb_first_cached(&tree->root);
 		em = rb_entry(node, struct extent_map, rb_node);
 		em->flags &= ~(EXTENT_FLAG_PINNED | EXTENT_FLAG_LOGGING);
 		remove_extent_mapping(inode, em);
 		free_extent_map(em);
-		cond_resched_rwlock_write(&tree->lock);
+
+		if (cond_resched_rwlock_write(&tree->lock))
+			node = rb_first(&tree->root);
+		else
+			node = next;
 	}
 	write_unlock(&tree->lock);
 }
@@ -1058,12 +1060,12 @@  static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
 		return 0;
 
 	write_lock(&tree->lock);
-	node = rb_first_cached(&tree->root);
+	node = rb_first(&tree->root);
 	while (node) {
+		struct rb_node *next = rb_next(node);
 		struct extent_map *em;
 
 		em = rb_entry(node, struct extent_map, rb_node);
-		node = rb_next(node);
 		(*scanned)++;
 
 		if (em->flags & EXTENT_FLAG_PINNED)
@@ -1094,7 +1096,9 @@  static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
 		 * lock and took it again.
 		 */
 		if (cond_resched_rwlock_write(&tree->lock))
-			node = rb_first_cached(&tree->root);
+			node = rb_first(&tree->root);
+		else
+			node = next;
 	}
 	write_unlock(&tree->lock);
 	up_read(&inode->i_mmap_lock);
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 9c0793888d13..9144721b88a5 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -115,7 +115,7 @@  struct extent_map {
 };
 
 struct extent_map_tree {
-	struct rb_root_cached root;
+	struct rb_root root;
 	struct list_head modified_extents;
 	rwlock_t lock;
 };
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 075e6930acda..c511a1297956 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -19,8 +19,8 @@  static int free_extent_map_tree(struct btrfs_inode *inode)
 	int ret = 0;
 
 	write_lock(&em_tree->lock);
-	while (!RB_EMPTY_ROOT(&em_tree->root.rb_root)) {
-		node = rb_first_cached(&em_tree->root);
+	while (!RB_EMPTY_ROOT(&em_tree->root)) {
+		node = rb_first(&em_tree->root);
 		em = rb_entry(node, struct extent_map, rb_node);
 		remove_extent_mapping(inode, em);
 
@@ -551,7 +551,7 @@  static int validate_range(struct extent_map_tree *em_tree, int index)
 	struct rb_node *n;
 	int i;
 
-	for (i = 0, n = rb_first_cached(&em_tree->root);
+	for (i = 0, n = rb_first(&em_tree->root);
 	     valid_ranges[index][i].len && n;
 	     i++, n = rb_next(n)) {
 		struct extent_map *entry = rb_entry(n, struct extent_map, rb_node);