Message ID | c8269e17d8295c223043c2b6bc09b04beab52a18.1701871671.git.dsterba@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Convert btrfs_root::delayed_nodes_tree to xarray | expand |
On Wed, Dec 6, 2023 at 2:23 PM David Sterba <dsterba@suse.com> wrote: > > The radix-tree has been superseded by the xarray > (https://lwn.net/Articles/745073), this patch converts the > btrfs_root::delayed_nodes, the APIs are used in a simple way. > > First idea is to do xa_insert() but this would require GFP_ATOMIC > allocation which we want to avoid if possible. The preload mechanism of > radix-tree can be emulated within the xarray API. > > - xa_reserve() with GFP_NOFS outside of the lock, the reserved entry > is inserted atomically at most once > > - xa_store() under a lock, in case something races in we can detect that > and xa_load() returns a valid pointer > > All uses of xa_load() must check for a valid pointer in case they manage > to get between the xa_reserve() and xa_store(), this is handled in > btrfs_get_delayed_node(). > > Otherwise the functionality is equivalent, xarray implements the > radix-tree and there should be no performance difference. > > The patch continues the efforts started in 253bf57555e451 ("btrfs: turn > delayed_nodes_tree into an XArray") and fixes the problems with locking > and GFP flags 088aea3b97e0ae ("Revert "btrfs: turn delayed_nodes_tree > into an XArray""). > > Signed-off-by: David Sterba <dsterba@suse.com> > --- > fs/btrfs/ctree.h | 6 ++-- > fs/btrfs/delayed-inode.c | 64 ++++++++++++++++++++++------------------ > fs/btrfs/disk-io.c | 3 +- > fs/btrfs/inode.c | 2 +- > 4 files changed, 41 insertions(+), 34 deletions(-) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index 54fd4eb92745..70e828d33177 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -227,10 +227,10 @@ struct btrfs_root { > struct rb_root inode_tree; > > /* > - * radix tree that keeps track of delayed nodes of every inode, > - * protected by inode_lock > + * Xarray that keeps track of delayed nodes of every inode, protected > + * by @inode_lock. > */ > - struct radix_tree_root delayed_nodes_tree; > + struct xarray delayed_nodes; > /* > * right now this just gets used so that a root has its own devid > * for stat. It may be used for more later > diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c > index 91159dd7355b..0247faf5bb01 100644 > --- a/fs/btrfs/delayed-inode.c > +++ b/fs/btrfs/delayed-inode.c > @@ -71,7 +71,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( > } > > spin_lock(&root->inode_lock); > - node = radix_tree_lookup(&root->delayed_nodes_tree, ino); > + node = xa_load(&root->delayed_nodes, ino); > > if (node) { > if (btrfs_inode->delayed_node) { > @@ -83,9 +83,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( > > /* > * It's possible that we're racing into the middle of removing > - * this node from the radix tree. In this case, the refcount > + * this node from the xarray. In this case, the refcount > * was zero and it should never go back to one. Just return > - * NULL like it was never in the radix at all; our release > + * NULL like it was never in the xarray at all; our release > * function is in the process of removing it. > * > * Some implementations of refcount_inc refuse to bump the > @@ -93,7 +93,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( > * here, refcount_inc() may decide to just WARN_ONCE() instead > * of actually bumping the refcount. > * > - * If this node is properly in the radix, we want to bump the > + * If this node is properly in the xarray, we want to bump the > * refcount twice, once for the inode and once for this get > * operation. > */ > @@ -120,6 +120,7 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( > struct btrfs_root *root = btrfs_inode->root; > u64 ino = btrfs_ino(btrfs_inode); > int ret; > + void *ptr; > > again: > node = btrfs_get_delayed_node(btrfs_inode); > @@ -131,26 +132,30 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( > return ERR_PTR(-ENOMEM); > btrfs_init_delayed_node(node, root, ino); > > - /* cached in the btrfs inode and can be accessed */ > + /* Cached in the inode and can be accessed. */ > refcount_set(&node->refs, 2); > > - ret = radix_tree_preload(GFP_NOFS); > - if (ret) { > + /* Allocate and reserve the slot, from now it can return a NULL on xa_load(). */ > + ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS); > + if (ret == -ENOMEM) { > kmem_cache_free(delayed_node_cache, node); > - return ERR_PTR(ret); > + return ERR_PTR(-ENOMEM); > } > - > spin_lock(&root->inode_lock); > - ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); > - if (ret == -EEXIST) { > + ptr = xa_load(&root->delayed_nodes, ino); > + if (ptr) { > + /* Somebody inserted it, go back and read it. */ > spin_unlock(&root->inode_lock); > kmem_cache_free(delayed_node_cache, node); > - radix_tree_preload_end(); > + node = NULL; > goto again; > } > + ptr = xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC); > + ASSERT(xa_err(ptr) != -EINVAL); > + ASSERT(xa_err(ptr) != -ENOMEM); > + ASSERT(ptr == NULL); > btrfs_inode->delayed_node = node; > spin_unlock(&root->inode_lock); > - radix_tree_preload_end(); > > return node; > } > @@ -269,8 +274,7 @@ static void __btrfs_release_delayed_node( > * back up. We can delete it now. > */ > ASSERT(refcount_read(&delayed_node->refs) == 0); > - radix_tree_delete(&root->delayed_nodes_tree, > - delayed_node->inode_id); > + xa_erase(&root->delayed_nodes, delayed_node->inode_id); > spin_unlock(&root->inode_lock); > kmem_cache_free(delayed_node_cache, delayed_node); > } > @@ -2038,34 +2042,36 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode) > > void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) > { > - u64 inode_id = 0; > + unsigned long index = 0; > struct btrfs_delayed_node *delayed_nodes[8]; > - int i, n; > > while (1) { > + struct btrfs_delayed_node *node; > + int count; > + > spin_lock(&root->inode_lock); > - n = radix_tree_gang_lookup(&root->delayed_nodes_tree, > - (void **)delayed_nodes, inode_id, > - ARRAY_SIZE(delayed_nodes)); > - if (!n) { > + if (xa_empty(&root->delayed_nodes)) { > spin_unlock(&root->inode_lock); > - break; > + return; > } > > - inode_id = delayed_nodes[n - 1]->inode_id + 1; > - for (i = 0; i < n; i++) { > + count = 0; > + xa_for_each_start(&root->delayed_nodes, index, node, index) { > /* > * Don't increase refs in case the node is dead and > * about to be removed from the tree in the loop below > */ > - if (!refcount_inc_not_zero(&delayed_nodes[i]->refs)) > - delayed_nodes[i] = NULL; > + if (refcount_inc_not_zero(&node->refs)) { > + delayed_nodes[count] = node; > + count++; > + } So before each xa_for_each_start() call, the delayed_nodes array should be zeroed (make all entries point to NULL). As it is we're leaving stale pointers in it in case we find any with a zero refcount. This was not a problem before, because the radix_tree_gang_lookup() took the array as an argument and did that. Otherwise the changes look good. Thanks. > + if (count >= ARRAY_SIZE(delayed_nodes)) > + break; > } > spin_unlock(&root->inode_lock); > + index++; > > - for (i = 0; i < n; i++) { > - if (!delayed_nodes[i]) > - continue; > + for (int i = 0; i < count; i++) { > __btrfs_kill_delayed_node(delayed_nodes[i]); > btrfs_release_delayed_node(delayed_nodes[i]); > } > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index a1f440cd6d45..5e64316f8026 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -655,7 +655,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, > root->nr_delalloc_inodes = 0; > root->nr_ordered_extents = 0; > root->inode_tree = RB_ROOT; > - INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); > + /* GFP flags are compatible with XA_FLAGS_*. */ > + xa_init_flags(&root->delayed_nodes, GFP_ATOMIC); > > btrfs_init_root_block_rsv(root); > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 4fa7fabca2c5..4e8c82e5d7a6 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -3805,7 +3805,7 @@ static int btrfs_read_locked_inode(struct inode *inode, > * cache. > * > * This is required for both inode re-read from disk and delayed inode > - * in delayed_nodes_tree. > + * in the delayed_nodes xarray. > */ > if (BTRFS_I(inode)->last_trans == btrfs_get_fs_generation(fs_info)) > set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, > -- > 2.42.1 > >
On Wed, Dec 6, 2023 at 3:52 PM Filipe Manana <fdmanana@kernel.org> wrote: > > On Wed, Dec 6, 2023 at 2:23 PM David Sterba <dsterba@suse.com> wrote: > > > > The radix-tree has been superseded by the xarray > > (https://lwn.net/Articles/745073), this patch converts the > > btrfs_root::delayed_nodes, the APIs are used in a simple way. > > > > First idea is to do xa_insert() but this would require GFP_ATOMIC > > allocation which we want to avoid if possible. The preload mechanism of > > radix-tree can be emulated within the xarray API. > > > > - xa_reserve() with GFP_NOFS outside of the lock, the reserved entry > > is inserted atomically at most once > > > > - xa_store() under a lock, in case something races in we can detect that > > and xa_load() returns a valid pointer > > > > All uses of xa_load() must check for a valid pointer in case they manage > > to get between the xa_reserve() and xa_store(), this is handled in > > btrfs_get_delayed_node(). > > > > Otherwise the functionality is equivalent, xarray implements the > > radix-tree and there should be no performance difference. > > > > The patch continues the efforts started in 253bf57555e451 ("btrfs: turn > > delayed_nodes_tree into an XArray") and fixes the problems with locking > > and GFP flags 088aea3b97e0ae ("Revert "btrfs: turn delayed_nodes_tree > > into an XArray""). > > > > Signed-off-by: David Sterba <dsterba@suse.com> > > --- > > fs/btrfs/ctree.h | 6 ++-- > > fs/btrfs/delayed-inode.c | 64 ++++++++++++++++++++++------------------ > > fs/btrfs/disk-io.c | 3 +- > > fs/btrfs/inode.c | 2 +- > > 4 files changed, 41 insertions(+), 34 deletions(-) > > > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > > index 54fd4eb92745..70e828d33177 100644 > > --- a/fs/btrfs/ctree.h > > +++ b/fs/btrfs/ctree.h > > @@ -227,10 +227,10 @@ struct btrfs_root { > > struct rb_root inode_tree; > > > > /* > > - * radix tree that keeps track of delayed nodes of every inode, > > - * protected by inode_lock > > + * Xarray that keeps track of delayed nodes of every inode, protected > > + * by @inode_lock. > > */ > > - struct radix_tree_root delayed_nodes_tree; > > + struct xarray delayed_nodes; > > /* > > * right now this just gets used so that a root has its own devid > > * for stat. It may be used for more later > > diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c > > index 91159dd7355b..0247faf5bb01 100644 > > --- a/fs/btrfs/delayed-inode.c > > +++ b/fs/btrfs/delayed-inode.c > > @@ -71,7 +71,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( > > } > > > > spin_lock(&root->inode_lock); > > - node = radix_tree_lookup(&root->delayed_nodes_tree, ino); > > + node = xa_load(&root->delayed_nodes, ino); > > > > if (node) { > > if (btrfs_inode->delayed_node) { > > @@ -83,9 +83,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( > > > > /* > > * It's possible that we're racing into the middle of removing > > - * this node from the radix tree. In this case, the refcount > > + * this node from the xarray. In this case, the refcount > > * was zero and it should never go back to one. Just return > > - * NULL like it was never in the radix at all; our release > > + * NULL like it was never in the xarray at all; our release > > * function is in the process of removing it. > > * > > * Some implementations of refcount_inc refuse to bump the > > @@ -93,7 +93,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( > > * here, refcount_inc() may decide to just WARN_ONCE() instead > > * of actually bumping the refcount. > > * > > - * If this node is properly in the radix, we want to bump the > > + * If this node is properly in the xarray, we want to bump the > > * refcount twice, once for the inode and once for this get > > * operation. > > */ > > @@ -120,6 +120,7 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( > > struct btrfs_root *root = btrfs_inode->root; > > u64 ino = btrfs_ino(btrfs_inode); > > int ret; > > + void *ptr; > > > > again: > > node = btrfs_get_delayed_node(btrfs_inode); > > @@ -131,26 +132,30 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( > > return ERR_PTR(-ENOMEM); > > btrfs_init_delayed_node(node, root, ino); > > > > - /* cached in the btrfs inode and can be accessed */ > > + /* Cached in the inode and can be accessed. */ > > refcount_set(&node->refs, 2); > > > > - ret = radix_tree_preload(GFP_NOFS); > > - if (ret) { > > + /* Allocate and reserve the slot, from now it can return a NULL on xa_load(). */ > > + ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS); > > + if (ret == -ENOMEM) { > > kmem_cache_free(delayed_node_cache, node); > > - return ERR_PTR(ret); > > + return ERR_PTR(-ENOMEM); > > } > > - > > spin_lock(&root->inode_lock); > > - ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); > > - if (ret == -EEXIST) { > > + ptr = xa_load(&root->delayed_nodes, ino); > > + if (ptr) { > > + /* Somebody inserted it, go back and read it. */ > > spin_unlock(&root->inode_lock); > > kmem_cache_free(delayed_node_cache, node); > > - radix_tree_preload_end(); > > + node = NULL; > > goto again; > > } > > + ptr = xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC); > > + ASSERT(xa_err(ptr) != -EINVAL); > > + ASSERT(xa_err(ptr) != -ENOMEM); > > + ASSERT(ptr == NULL); > > btrfs_inode->delayed_node = node; > > spin_unlock(&root->inode_lock); > > - radix_tree_preload_end(); > > > > return node; > > } > > @@ -269,8 +274,7 @@ static void __btrfs_release_delayed_node( > > * back up. We can delete it now. > > */ > > ASSERT(refcount_read(&delayed_node->refs) == 0); > > - radix_tree_delete(&root->delayed_nodes_tree, > > - delayed_node->inode_id); > > + xa_erase(&root->delayed_nodes, delayed_node->inode_id); > > spin_unlock(&root->inode_lock); > > kmem_cache_free(delayed_node_cache, delayed_node); > > } > > @@ -2038,34 +2042,36 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode) > > > > void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) > > { > > - u64 inode_id = 0; > > + unsigned long index = 0; > > struct btrfs_delayed_node *delayed_nodes[8]; > > - int i, n; > > > > while (1) { > > + struct btrfs_delayed_node *node; > > + int count; > > + > > spin_lock(&root->inode_lock); > > - n = radix_tree_gang_lookup(&root->delayed_nodes_tree, > > - (void **)delayed_nodes, inode_id, > > - ARRAY_SIZE(delayed_nodes)); > > - if (!n) { > > + if (xa_empty(&root->delayed_nodes)) { > > spin_unlock(&root->inode_lock); > > - break; > > + return; > > } > > > > - inode_id = delayed_nodes[n - 1]->inode_id + 1; > > - for (i = 0; i < n; i++) { > > + count = 0; > > + xa_for_each_start(&root->delayed_nodes, index, node, index) { > > /* > > * Don't increase refs in case the node is dead and > > * about to be removed from the tree in the loop below > > */ > > - if (!refcount_inc_not_zero(&delayed_nodes[i]->refs)) > > - delayed_nodes[i] = NULL; > > + if (refcount_inc_not_zero(&node->refs)) { > > + delayed_nodes[count] = node; > > + count++; > > + } > > So before each xa_for_each_start() call, the delayed_nodes array > should be zeroed (make all entries point to NULL). > As it is we're leaving stale pointers in it in case we find any with a > zero refcount. > This was not a problem before, because the radix_tree_gang_lookup() > took the array as an argument and did that. Nevermind, got confused at the diff due to different array indexes before and after the patch. Reviewed-by: Filipe Manana <fdmanana@suse.com> Looks good, thanks. > > Otherwise the changes look good. > > Thanks. > > > > + if (count >= ARRAY_SIZE(delayed_nodes)) > > + break; > > } > > spin_unlock(&root->inode_lock); > > + index++; > > > > - for (i = 0; i < n; i++) { > > - if (!delayed_nodes[i]) > > - continue; > > + for (int i = 0; i < count; i++) { > > __btrfs_kill_delayed_node(delayed_nodes[i]); > > btrfs_release_delayed_node(delayed_nodes[i]); > > } > > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > > index a1f440cd6d45..5e64316f8026 100644 > > --- a/fs/btrfs/disk-io.c > > +++ b/fs/btrfs/disk-io.c > > @@ -655,7 +655,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, > > root->nr_delalloc_inodes = 0; > > root->nr_ordered_extents = 0; > > root->inode_tree = RB_ROOT; > > - INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); > > + /* GFP flags are compatible with XA_FLAGS_*. */ > > + xa_init_flags(&root->delayed_nodes, GFP_ATOMIC); > > > > btrfs_init_root_block_rsv(root); > > > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > > index 4fa7fabca2c5..4e8c82e5d7a6 100644 > > --- a/fs/btrfs/inode.c > > +++ b/fs/btrfs/inode.c > > @@ -3805,7 +3805,7 @@ static int btrfs_read_locked_inode(struct inode *inode, > > * cache. > > * > > * This is required for both inode re-read from disk and delayed inode > > - * in delayed_nodes_tree. > > + * in the delayed_nodes xarray. > > */ > > if (BTRFS_I(inode)->last_trans == btrfs_get_fs_generation(fs_info)) > > set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, > > -- > > 2.42.1 > > > >
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 54fd4eb92745..70e828d33177 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -227,10 +227,10 @@ struct btrfs_root { struct rb_root inode_tree; /* - * radix tree that keeps track of delayed nodes of every inode, - * protected by inode_lock + * Xarray that keeps track of delayed nodes of every inode, protected + * by @inode_lock. */ - struct radix_tree_root delayed_nodes_tree; + struct xarray delayed_nodes; /* * right now this just gets used so that a root has its own devid * for stat. It may be used for more later diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 91159dd7355b..0247faf5bb01 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -71,7 +71,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( } spin_lock(&root->inode_lock); - node = radix_tree_lookup(&root->delayed_nodes_tree, ino); + node = xa_load(&root->delayed_nodes, ino); if (node) { if (btrfs_inode->delayed_node) { @@ -83,9 +83,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( /* * It's possible that we're racing into the middle of removing - * this node from the radix tree. In this case, the refcount + * this node from the xarray. In this case, the refcount * was zero and it should never go back to one. Just return - * NULL like it was never in the radix at all; our release + * NULL like it was never in the xarray at all; our release * function is in the process of removing it. * * Some implementations of refcount_inc refuse to bump the @@ -93,7 +93,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( * here, refcount_inc() may decide to just WARN_ONCE() instead * of actually bumping the refcount. * - * If this node is properly in the radix, we want to bump the + * If this node is properly in the xarray, we want to bump the * refcount twice, once for the inode and once for this get * operation. */ @@ -120,6 +120,7 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( struct btrfs_root *root = btrfs_inode->root; u64 ino = btrfs_ino(btrfs_inode); int ret; + void *ptr; again: node = btrfs_get_delayed_node(btrfs_inode); @@ -131,26 +132,30 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( return ERR_PTR(-ENOMEM); btrfs_init_delayed_node(node, root, ino); - /* cached in the btrfs inode and can be accessed */ + /* Cached in the inode and can be accessed. */ refcount_set(&node->refs, 2); - ret = radix_tree_preload(GFP_NOFS); - if (ret) { + /* Allocate and reserve the slot, from now it can return a NULL on xa_load(). */ + ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS); + if (ret == -ENOMEM) { kmem_cache_free(delayed_node_cache, node); - return ERR_PTR(ret); + return ERR_PTR(-ENOMEM); } - spin_lock(&root->inode_lock); - ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); - if (ret == -EEXIST) { + ptr = xa_load(&root->delayed_nodes, ino); + if (ptr) { + /* Somebody inserted it, go back and read it. */ spin_unlock(&root->inode_lock); kmem_cache_free(delayed_node_cache, node); - radix_tree_preload_end(); + node = NULL; goto again; } + ptr = xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC); + ASSERT(xa_err(ptr) != -EINVAL); + ASSERT(xa_err(ptr) != -ENOMEM); + ASSERT(ptr == NULL); btrfs_inode->delayed_node = node; spin_unlock(&root->inode_lock); - radix_tree_preload_end(); return node; } @@ -269,8 +274,7 @@ static void __btrfs_release_delayed_node( * back up. We can delete it now. */ ASSERT(refcount_read(&delayed_node->refs) == 0); - radix_tree_delete(&root->delayed_nodes_tree, - delayed_node->inode_id); + xa_erase(&root->delayed_nodes, delayed_node->inode_id); spin_unlock(&root->inode_lock); kmem_cache_free(delayed_node_cache, delayed_node); } @@ -2038,34 +2042,36 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode) void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) { - u64 inode_id = 0; + unsigned long index = 0; struct btrfs_delayed_node *delayed_nodes[8]; - int i, n; while (1) { + struct btrfs_delayed_node *node; + int count; + spin_lock(&root->inode_lock); - n = radix_tree_gang_lookup(&root->delayed_nodes_tree, - (void **)delayed_nodes, inode_id, - ARRAY_SIZE(delayed_nodes)); - if (!n) { + if (xa_empty(&root->delayed_nodes)) { spin_unlock(&root->inode_lock); - break; + return; } - inode_id = delayed_nodes[n - 1]->inode_id + 1; - for (i = 0; i < n; i++) { + count = 0; + xa_for_each_start(&root->delayed_nodes, index, node, index) { /* * Don't increase refs in case the node is dead and * about to be removed from the tree in the loop below */ - if (!refcount_inc_not_zero(&delayed_nodes[i]->refs)) - delayed_nodes[i] = NULL; + if (refcount_inc_not_zero(&node->refs)) { + delayed_nodes[count] = node; + count++; + } + if (count >= ARRAY_SIZE(delayed_nodes)) + break; } spin_unlock(&root->inode_lock); + index++; - for (i = 0; i < n; i++) { - if (!delayed_nodes[i]) - continue; + for (int i = 0; i < count; i++) { __btrfs_kill_delayed_node(delayed_nodes[i]); btrfs_release_delayed_node(delayed_nodes[i]); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a1f440cd6d45..5e64316f8026 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -655,7 +655,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, root->nr_delalloc_inodes = 0; root->nr_ordered_extents = 0; root->inode_tree = RB_ROOT; - INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); + /* GFP flags are compatible with XA_FLAGS_*. */ + xa_init_flags(&root->delayed_nodes, GFP_ATOMIC); btrfs_init_root_block_rsv(root); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4fa7fabca2c5..4e8c82e5d7a6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3805,7 +3805,7 @@ static int btrfs_read_locked_inode(struct inode *inode, * cache. * * This is required for both inode re-read from disk and delayed inode - * in delayed_nodes_tree. + * in the delayed_nodes xarray. */ if (BTRFS_I(inode)->last_trans == btrfs_get_fs_generation(fs_info)) set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
The radix-tree has been superseded by the xarray (https://lwn.net/Articles/745073), this patch converts the btrfs_root::delayed_nodes, the APIs are used in a simple way. First idea is to do xa_insert() but this would require GFP_ATOMIC allocation which we want to avoid if possible. The preload mechanism of radix-tree can be emulated within the xarray API. - xa_reserve() with GFP_NOFS outside of the lock, the reserved entry is inserted atomically at most once - xa_store() under a lock, in case something races in we can detect that and xa_load() returns a valid pointer All uses of xa_load() must check for a valid pointer in case they manage to get between the xa_reserve() and xa_store(), this is handled in btrfs_get_delayed_node(). Otherwise the functionality is equivalent, xarray implements the radix-tree and there should be no performance difference. The patch continues the efforts started in 253bf57555e451 ("btrfs: turn delayed_nodes_tree into an XArray") and fixes the problems with locking and GFP flags 088aea3b97e0ae ("Revert "btrfs: turn delayed_nodes_tree into an XArray""). Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/ctree.h | 6 ++-- fs/btrfs/delayed-inode.c | 64 ++++++++++++++++++++++------------------ fs/btrfs/disk-io.c | 3 +- fs/btrfs/inode.c | 2 +- 4 files changed, 41 insertions(+), 34 deletions(-)