diff mbox series

btrfs: zoned: properly take lock to read/update BG's zoned variables

Message ID 9eb249aedabfa6008cbf13706b7f3c03dc59855d.1722241885.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned: properly take lock to read/update BG's zoned variables | expand

Commit Message

Naohiro Aota July 29, 2024, 8:32 a.m. UTC
__btrfs_add_free_space_zoned() references and modifies BG's alloc_offset,
ro, and zone_unusable, but without taking the lock. It is mostly safe
because they monotonically increase (at least for now) and this function is
mostly called by a transaction commit, which is serialized by itself.

Still, taking the lock is a safer and correct option and I'm going to add a
change to reset zone_unusable while a block group is still alive. So, add
locking around the operations.

Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
CC: stable@vger.kernel.org # 5.15+
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/free-space-cache.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

Comments

Filipe Manana July 29, 2024, 11:46 a.m. UTC | #1
On Mon, Jul 29, 2024 at 9:33 AM Naohiro Aota <naohiro.aota@wdc.com> wrote:
>
> __btrfs_add_free_space_zoned() references and modifies BG's alloc_offset,
> ro, and zone_unusable, but without taking the lock. It is mostly safe
> because they monotonically increase (at least for now) and this function is
> mostly called by a transaction commit, which is serialized by itself.
>
> Still, taking the lock is a safer and correct option and I'm going to add a
> change to reset zone_unusable while a block group is still alive. So, add
> locking around the operations.
>
> Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
> CC: stable@vger.kernel.org # 5.15+
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>  fs/btrfs/free-space-cache.c | 15 ++++++++-------
>  1 file changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index f5996a43db24..51263d6dac36 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -2697,15 +2697,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
>         u64 offset = bytenr - block_group->start;
>         u64 to_free, to_unusable;
>         int bg_reclaim_threshold = 0;
> -       bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
> +       bool initial;
>         u64 reclaimable_unusable;
>
> -       WARN_ON(!initial && offset + size > block_group->zone_capacity);
> +       guard(spinlock)(&block_group->lock);

What's this guard thing and why do we use it only here? We don't use
it anywhere else in btrfs' code base.
A quick search in the Documentation directory of the kernel and I
can't find anything there.
In the fs/ directory there's only two users of it.

Why not the usual spin_lock(&block_group->lock) call?

>
> +       initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
> +       WARN_ON(!initial && offset + size > block_group->zone_capacity);
>         if (!initial)
>                 bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
>
> -       spin_lock(&ctl->tree_lock);
>         if (!used)
>                 to_free = size;
>         else if (initial)
> @@ -2718,7 +2719,9 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
>                 to_free = offset + size - block_group->alloc_offset;
>         to_unusable = size - to_free;
>
> -       ctl->free_space += to_free;
> +       scoped_guard(spinlock, &ctl->tree_lock) {
> +               ctl->free_space += to_free;
> +       }

Can you clarify (in the changelog) why is the locking so unusual here
by using these guard() things? Do we really need it, what do we gain
from it?

>         /*
>          * If the block group is read-only, we should account freed space into
>          * bytes_readonly.
> @@ -2727,15 +2730,13 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
>                 block_group->zone_unusable += to_unusable;
>                 WARN_ON(block_group->zone_unusable > block_group->length);
>         }
> -       spin_unlock(&ctl->tree_lock);
>         if (!used) {
> -               spin_lock(&block_group->lock);
>                 block_group->alloc_offset -= size;
> -               spin_unlock(&block_group->lock);
>         }
>
>         reclaimable_unusable = block_group->zone_unusable -
>                                (block_group->length - block_group->zone_capacity);
> +

Stray new line, unrelated to the change.

Thanks.


>         /* All the region is now unusable. Mark it as unused and reclaim */
>         if (block_group->zone_unusable == block_group->length) {
>                 btrfs_mark_bg_unused(block_group);
> --
> 2.45.2
>
>
Josef Bacik July 29, 2024, 2:41 p.m. UTC | #2
On Mon, Jul 29, 2024 at 12:46:48PM +0100, Filipe Manana wrote:
> On Mon, Jul 29, 2024 at 9:33 AM Naohiro Aota <naohiro.aota@wdc.com> wrote:
> >
> > __btrfs_add_free_space_zoned() references and modifies BG's alloc_offset,
> > ro, and zone_unusable, but without taking the lock. It is mostly safe
> > because they monotonically increase (at least for now) and this function is
> > mostly called by a transaction commit, which is serialized by itself.
> >
> > Still, taking the lock is a safer and correct option and I'm going to add a
> > change to reset zone_unusable while a block group is still alive. So, add
> > locking around the operations.
> >
> > Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
> > CC: stable@vger.kernel.org # 5.15+
> > Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> > ---
> >  fs/btrfs/free-space-cache.c | 15 ++++++++-------
> >  1 file changed, 8 insertions(+), 7 deletions(-)
> >
> > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> > index f5996a43db24..51263d6dac36 100644
> > --- a/fs/btrfs/free-space-cache.c
> > +++ b/fs/btrfs/free-space-cache.c
> > @@ -2697,15 +2697,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
> >         u64 offset = bytenr - block_group->start;
> >         u64 to_free, to_unusable;
> >         int bg_reclaim_threshold = 0;
> > -       bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
> > +       bool initial;
> >         u64 reclaimable_unusable;
> >
> > -       WARN_ON(!initial && offset + size > block_group->zone_capacity);
> > +       guard(spinlock)(&block_group->lock);
> 
> What's this guard thing and why do we use it only here? We don't use
> it anywhere else in btrfs' code base.
> A quick search in the Documentation directory of the kernel and I
> can't find anything there.
> In the fs/ directory there's only two users of it.
> 

It's relatively new, it's like the C++ guards.  If you look in the VFS we've
started using it pretty heavily there.

But it does need to be documented, if you look at include/linux/cleanup.h it has
documentation about it.

> Why not the usual spin_lock(&block_group->lock) call?

Because this is nice for error handling.  Here it doesn't look as helpful, but
look at d842379313a2 ("fs: use guard for namespace_sem in statmount()") for an
example of how much it cleans up the error paths.

FWIW one of the tasks I have for one of our new people is to come through and
utilize some of this new infrastructure to cleanup our error paths, so while it
doesn't exist yet inside btrfs, I hope it gets used pretty liberally.  Thanks,

Josef
Filipe Manana July 29, 2024, 2:48 p.m. UTC | #3
On Mon, Jul 29, 2024 at 3:41 PM Josef Bacik <josef@toxicpanda.com> wrote:
>
> On Mon, Jul 29, 2024 at 12:46:48PM +0100, Filipe Manana wrote:
> > On Mon, Jul 29, 2024 at 9:33 AM Naohiro Aota <naohiro.aota@wdc.com> wrote:
> > >
> > > __btrfs_add_free_space_zoned() references and modifies BG's alloc_offset,
> > > ro, and zone_unusable, but without taking the lock. It is mostly safe
> > > because they monotonically increase (at least for now) and this function is
> > > mostly called by a transaction commit, which is serialized by itself.
> > >
> > > Still, taking the lock is a safer and correct option and I'm going to add a
> > > change to reset zone_unusable while a block group is still alive. So, add
> > > locking around the operations.
> > >
> > > Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
> > > CC: stable@vger.kernel.org # 5.15+
> > > Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> > > ---
> > >  fs/btrfs/free-space-cache.c | 15 ++++++++-------
> > >  1 file changed, 8 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> > > index f5996a43db24..51263d6dac36 100644
> > > --- a/fs/btrfs/free-space-cache.c
> > > +++ b/fs/btrfs/free-space-cache.c
> > > @@ -2697,15 +2697,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
> > >         u64 offset = bytenr - block_group->start;
> > >         u64 to_free, to_unusable;
> > >         int bg_reclaim_threshold = 0;
> > > -       bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
> > > +       bool initial;
> > >         u64 reclaimable_unusable;
> > >
> > > -       WARN_ON(!initial && offset + size > block_group->zone_capacity);
> > > +       guard(spinlock)(&block_group->lock);
> >
> > What's this guard thing and why do we use it only here? We don't use
> > it anywhere else in btrfs' code base.
> > A quick search in the Documentation directory of the kernel and I
> > can't find anything there.
> > In the fs/ directory there's only two users of it.
> >
>
> It's relatively new, it's like the C++ guards.  If you look in the VFS we've
> started using it pretty heavily there.
>
> But it does need to be documented, if you look at include/linux/cleanup.h it has
> documentation about it.
>
> > Why not the usual spin_lock(&block_group->lock) call?
>
> Because this is nice for error handling.  Here it doesn't look as helpful, but
> look at d842379313a2 ("fs: use guard for namespace_sem in statmount()") for an
> example of how much it cleans up the error paths.
>
> FWIW one of the tasks I have for one of our new people is to come through and
> utilize some of this new infrastructure to cleanup our error paths, so while it
> doesn't exist yet inside btrfs, I hope it gets used pretty liberally.  Thanks,

I see, I figured it was something to avoid repeating unlock calls.

But rather than fixing a bug and doing the migration in a single
patch, I'd rather have 1 patch to fix the bug and another to the
migration afterwards.

Thanks.

>
> Josef
Josef Bacik July 29, 2024, 2:58 p.m. UTC | #4
On Mon, Jul 29, 2024 at 03:48:39PM +0100, Filipe Manana wrote:
> On Mon, Jul 29, 2024 at 3:41 PM Josef Bacik <josef@toxicpanda.com> wrote:
> >
> > On Mon, Jul 29, 2024 at 12:46:48PM +0100, Filipe Manana wrote:
> > > On Mon, Jul 29, 2024 at 9:33 AM Naohiro Aota <naohiro.aota@wdc.com> wrote:
> > > >
> > > > __btrfs_add_free_space_zoned() references and modifies BG's alloc_offset,
> > > > ro, and zone_unusable, but without taking the lock. It is mostly safe
> > > > because they monotonically increase (at least for now) and this function is
> > > > mostly called by a transaction commit, which is serialized by itself.
> > > >
> > > > Still, taking the lock is a safer and correct option and I'm going to add a
> > > > change to reset zone_unusable while a block group is still alive. So, add
> > > > locking around the operations.
> > > >
> > > > Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
> > > > CC: stable@vger.kernel.org # 5.15+
> > > > Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> > > > ---
> > > >  fs/btrfs/free-space-cache.c | 15 ++++++++-------
> > > >  1 file changed, 8 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> > > > index f5996a43db24..51263d6dac36 100644
> > > > --- a/fs/btrfs/free-space-cache.c
> > > > +++ b/fs/btrfs/free-space-cache.c
> > > > @@ -2697,15 +2697,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
> > > >         u64 offset = bytenr - block_group->start;
> > > >         u64 to_free, to_unusable;
> > > >         int bg_reclaim_threshold = 0;
> > > > -       bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
> > > > +       bool initial;
> > > >         u64 reclaimable_unusable;
> > > >
> > > > -       WARN_ON(!initial && offset + size > block_group->zone_capacity);
> > > > +       guard(spinlock)(&block_group->lock);
> > >
> > > What's this guard thing and why do we use it only here? We don't use
> > > it anywhere else in btrfs' code base.
> > > A quick search in the Documentation directory of the kernel and I
> > > can't find anything there.
> > > In the fs/ directory there's only two users of it.
> > >
> >
> > It's relatively new, it's like the C++ guards.  If you look in the VFS we've
> > started using it pretty heavily there.
> >
> > But it does need to be documented, if you look at include/linux/cleanup.h it has
> > documentation about it.
> >
> > > Why not the usual spin_lock(&block_group->lock) call?
> >
> > Because this is nice for error handling.  Here it doesn't look as helpful, but
> > look at d842379313a2 ("fs: use guard for namespace_sem in statmount()") for an
> > example of how much it cleans up the error paths.
> >
> > FWIW one of the tasks I have for one of our new people is to come through and
> > utilize some of this new infrastructure to cleanup our error paths, so while it
> > doesn't exist yet inside btrfs, I hope it gets used pretty liberally.  Thanks,
> 
> I see, I figured it was something to avoid repeating unlock calls.
> 
> But rather than fixing a bug and doing the migration in a single
> patch, I'd rather have 1 patch to fix the bug and another to the
> migration afterwards.
> 

Fair, agreed,

Josef
Naohiro Aota July 31, 2024, 3:03 a.m. UTC | #5
On Mon, Jul 29, 2024 at 10:58:08AM GMT, Josef Bacik wrote:
> On Mon, Jul 29, 2024 at 03:48:39PM +0100, Filipe Manana wrote:
> > On Mon, Jul 29, 2024 at 3:41 PM Josef Bacik <josef@toxicpanda.com> wrote:
> > >
> > > On Mon, Jul 29, 2024 at 12:46:48PM +0100, Filipe Manana wrote:
> > > > On Mon, Jul 29, 2024 at 9:33 AM Naohiro Aota <naohiro.aota@wdc.com> wrote:
> > > > >
> > > > > __btrfs_add_free_space_zoned() references and modifies BG's alloc_offset,
> > > > > ro, and zone_unusable, but without taking the lock. It is mostly safe
> > > > > because they monotonically increase (at least for now) and this function is
> > > > > mostly called by a transaction commit, which is serialized by itself.
> > > > >
> > > > > Still, taking the lock is a safer and correct option and I'm going to add a
> > > > > change to reset zone_unusable while a block group is still alive. So, add
> > > > > locking around the operations.
> > > > >
> > > > > Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
> > > > > CC: stable@vger.kernel.org # 5.15+
> > > > > Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> > > > > ---
> > > > >  fs/btrfs/free-space-cache.c | 15 ++++++++-------
> > > > >  1 file changed, 8 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> > > > > index f5996a43db24..51263d6dac36 100644
> > > > > --- a/fs/btrfs/free-space-cache.c
> > > > > +++ b/fs/btrfs/free-space-cache.c
> > > > > @@ -2697,15 +2697,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
> > > > >         u64 offset = bytenr - block_group->start;
> > > > >         u64 to_free, to_unusable;
> > > > >         int bg_reclaim_threshold = 0;
> > > > > -       bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
> > > > > +       bool initial;
> > > > >         u64 reclaimable_unusable;
> > > > >
> > > > > -       WARN_ON(!initial && offset + size > block_group->zone_capacity);
> > > > > +       guard(spinlock)(&block_group->lock);
> > > >
> > > > What's this guard thing and why do we use it only here? We don't use
> > > > it anywhere else in btrfs' code base.
> > > > A quick search in the Documentation directory of the kernel and I
> > > > can't find anything there.
> > > > In the fs/ directory there's only two users of it.
> > > >
> > >
> > > It's relatively new, it's like the C++ guards.  If you look in the VFS we've
> > > started using it pretty heavily there.
> > >
> > > But it does need to be documented, if you look at include/linux/cleanup.h it has
> > > documentation about it.
> > >
> > > > Why not the usual spin_lock(&block_group->lock) call?
> > >
> > > Because this is nice for error handling.  Here it doesn't look as helpful, but
> > > look at d842379313a2 ("fs: use guard for namespace_sem in statmount()") for an
> > > example of how much it cleans up the error paths.
> > >
> > > FWIW one of the tasks I have for one of our new people is to come through and
> > > utilize some of this new infrastructure to cleanup our error paths, so while it
> > > doesn't exist yet inside btrfs, I hope it gets used pretty liberally.  Thanks,
> > 
> > I see, I figured it was something to avoid repeating unlock calls.
> > 
> > But rather than fixing a bug and doing the migration in a single
> > patch, I'd rather have 1 patch to fix the bug and another to the
> > migration afterwards.
> > 
> 
> Fair, agreed,

Josef, thank you for your folllow-up. Yeah, I'd like to use it for
better handling and not to forget lock releasing.

I also aggree to have this bug fix without the guard(). It would be
better for the backporting. I'll rebase and rewrite this patch, and will
start using guard() in a new code. Eventually, I'll try writing
convertion patches, maybe per-file.

> 
> Josef
David Sterba July 31, 2024, 4:07 p.m. UTC | #6
On Mon, Jul 29, 2024 at 10:41:50AM -0400, Josef Bacik wrote:
> On Mon, Jul 29, 2024 at 12:46:48PM +0100, Filipe Manana wrote:
> > On Mon, Jul 29, 2024 at 9:33 AM Naohiro Aota <naohiro.aota@wdc.com> wrote:
> > What's this guard thing and why do we use it only here? We don't use
> > it anywhere else in btrfs' code base.
> > A quick search in the Documentation directory of the kernel and I
> > can't find anything there.
> > In the fs/ directory there's only two users of it.
> > 
> 
> It's relatively new, it's like the C++ guards.  If you look in the VFS we've
> started using it pretty heavily there.
> 
> But it does need to be documented, if you look at include/linux/cleanup.h it has
> documentation about it.
> 
> > Why not the usual spin_lock(&block_group->lock) call?
> 
> Because this is nice for error handling.  Here it doesn't look as helpful, but
> look at d842379313a2 ("fs: use guard for namespace_sem in statmount()") for an
> example of how much it cleans up the error paths.

I don't find this commit as a good example where guard improves things.
The code is factored to a helper do_statmount() and the guard is used as

	scoped_guard(rwsem_read, &lock)
		do_statmount();

With the rwsem this would be

	down_read(&lock);
	do_statmount();
	up_read(&lock);

> FWIW one of the tasks I have for one of our new people is to come through and
> utilize some of this new infrastructure to cleanup our error paths, so while it
> doesn't exist yet inside btrfs, I hope it gets used pretty liberally.  Thanks,

In some cases the guard() can improve the error handling, namely when
there are several branches and each of them ends with an unlock. But I
think this is rare.

The scoped_guard() can be useful more often but adds an indentation and
obscures that it's locking.

It can also make things worse when the scope is unnecessarily long,
covering function calls that don't need the protection but can take time
and keep the lock held. Like kfree(), "put structure" or similar cleanup
work.
diff mbox series

Patch

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f5996a43db24..51263d6dac36 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2697,15 +2697,16 @@  static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
 	u64 offset = bytenr - block_group->start;
 	u64 to_free, to_unusable;
 	int bg_reclaim_threshold = 0;
-	bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
+	bool initial;
 	u64 reclaimable_unusable;
 
-	WARN_ON(!initial && offset + size > block_group->zone_capacity);
+	guard(spinlock)(&block_group->lock);
 
+	initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
+	WARN_ON(!initial && offset + size > block_group->zone_capacity);
 	if (!initial)
 		bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
 
-	spin_lock(&ctl->tree_lock);
 	if (!used)
 		to_free = size;
 	else if (initial)
@@ -2718,7 +2719,9 @@  static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
 		to_free = offset + size - block_group->alloc_offset;
 	to_unusable = size - to_free;
 
-	ctl->free_space += to_free;
+	scoped_guard(spinlock, &ctl->tree_lock) {
+		ctl->free_space += to_free;
+	}
 	/*
 	 * If the block group is read-only, we should account freed space into
 	 * bytes_readonly.
@@ -2727,15 +2730,13 @@  static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
 		block_group->zone_unusable += to_unusable;
 		WARN_ON(block_group->zone_unusable > block_group->length);
 	}
-	spin_unlock(&ctl->tree_lock);
 	if (!used) {
-		spin_lock(&block_group->lock);
 		block_group->alloc_offset -= size;
-		spin_unlock(&block_group->lock);
 	}
 
 	reclaimable_unusable = block_group->zone_unusable -
 			       (block_group->length - block_group->zone_capacity);
+
 	/* All the region is now unusable. Mark it as unused and reclaim */
 	if (block_group->zone_unusable == block_group->length) {
 		btrfs_mark_bg_unused(block_group);