diff mbox series

[v4,1/2] btrfs: zoned: reserve relocation block-group on mount

Message ID 20240523-zoned-gc-v4-1-23ed9f61afa0@kernel.org (mailing list archive)
State New
Headers show
Series btrfs: zoned: always set aside a zone for relocation | expand

Commit Message

Johannes Thumshirn May 23, 2024, 3:21 p.m. UTC
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>

Reserve one zone as a data relocation target on each mount. If we already
find one empty block group, there's no need to force a chunk allocation,
but we can use this empty data block group as our relocation target.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/block-group.c | 17 +++++++++++++
 fs/btrfs/disk-io.c     |  2 ++
 fs/btrfs/zoned.c       | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/zoned.h       |  3 +++
 4 files changed, 89 insertions(+)

Comments

Naohiro Aota May 24, 2024, 8:31 a.m. UTC | #1
On Thu, May 23, 2024 at 05:21:58PM GMT, Johannes Thumshirn wrote:
> From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> 
> Reserve one zone as a data relocation target on each mount. If we already
> find one empty block group, there's no need to force a chunk allocation,
> but we can use this empty data block group as our relocation target.
> 
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/block-group.c | 17 +++++++++++++
>  fs/btrfs/disk-io.c     |  2 ++
>  fs/btrfs/zoned.c       | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/zoned.h       |  3 +++
>  4 files changed, 89 insertions(+)
> 
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index 9910bae89966..1195f6721c90 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -1500,6 +1500,15 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
>  			btrfs_put_block_group(block_group);
>  			continue;
>  		}
> +
> +		spin_lock(&fs_info->relocation_bg_lock);
> +		if (block_group->start == fs_info->data_reloc_bg) {
> +			btrfs_put_block_group(block_group);
> +			spin_unlock(&fs_info->relocation_bg_lock);
> +			continue;
> +		}
> +		spin_unlock(&fs_info->relocation_bg_lock);
> +
>  		spin_unlock(&fs_info->unused_bgs_lock);
>  
>  		btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
> @@ -1835,6 +1844,14 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>  				      bg_list);
>  		list_del_init(&bg->bg_list);
>  
> +		spin_lock(&fs_info->relocation_bg_lock);
> +		if (bg->start == fs_info->data_reloc_bg) {
> +			btrfs_put_block_group(bg);
> +			spin_unlock(&fs_info->relocation_bg_lock);
> +			continue;
> +		}
> +		spin_unlock(&fs_info->relocation_bg_lock);
> +
>  		space_info = bg->space_info;
>  		spin_unlock(&fs_info->unused_bgs_lock);
>  
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 78d3966232ae..16bb52bcb69e 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3547,6 +3547,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
>  	}
>  	btrfs_discard_resume(fs_info);
>  
> +	btrfs_reserve_relocation_bg(fs_info);
> +
>  	if (fs_info->uuid_root &&
>  	    (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) ||
>  	     fs_info->generation != btrfs_super_uuid_tree_generation(disk_super))) {
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index c52a0063f7db..d291cf4f565e 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -17,6 +17,7 @@
>  #include "fs.h"
>  #include "accessors.h"
>  #include "bio.h"
> +#include "transaction.h"
>  
>  /* Maximum number of zones to report per blkdev_report_zones() call */
>  #define BTRFS_REPORT_NR_ZONES   4096
> @@ -2637,3 +2638,69 @@ void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
>  	}
>  	spin_unlock(&fs_info->zone_active_bgs_lock);
>  }
> +
> +static u64 find_empty_block_group(struct btrfs_space_info *sinfo, u64 flags)
> +{
> +	struct btrfs_block_group *bg;
> +
> +	for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
> +		list_for_each_entry(bg, &sinfo->block_groups[i], list) {
> +			if (bg->flags != flags)
> +				continue;
> +			if (bg->used == 0)
> +				return bg->start;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info)
> +{
> +	struct btrfs_root *tree_root = fs_info->tree_root;
> +	struct btrfs_space_info *sinfo = fs_info->data_sinfo;
> +	struct btrfs_trans_handle *trans;
> +	struct btrfs_block_group *bg;
> +	u64 flags = btrfs_get_alloc_profile(fs_info, sinfo->flags);
> +	u64 bytenr = 0;
> +
> +	lockdep_assert_not_held(&fs_info->relocation_bg_lock);
> +
> +	if (!btrfs_is_zoned(fs_info))
> +		return;
> +
> +	if (fs_info->data_reloc_bg)
> +		return;
> +
> +	bytenr = find_empty_block_group(sinfo, flags);
> +	if (!bytenr) {
> +		int ret;
> +
> +		trans = btrfs_join_transaction(tree_root);
> +		if (IS_ERR(trans))
> +			return;
> +
> +		ret = btrfs_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE);
> +		btrfs_end_transaction(trans);
> +		if (ret)
> +			return;
> +
> +		bytenr = find_empty_block_group(sinfo, flags);
> +		if (!bytenr)
> +			return;
> +
> +	}
> +
> +	bg = btrfs_lookup_block_group(fs_info, bytenr);
> +	if (!bg)
> +		return;
> +
> +	if (!btrfs_zone_activate(bg))
> +		bytenr = 0;
> +
> +	btrfs_put_block_group(bg);
> +
> +	spin_lock(&fs_info->relocation_bg_lock);
> +	fs_info->data_reloc_bg = bytenr;

Since the above check and the chunk allocation are outside of the lock, some
other thread can already set new data_reloc_bg in the meantime. So, setting
this under "if (!fs_info->data_reloc_bg)" would be safe.

With that:

Reviewed-by: Naohiro Aota <naohiro.aota@wdc.com>

Regards,
Filipe Manana May 24, 2024, 2:07 p.m. UTC | #2
On Thu, May 23, 2024 at 4:32 PM Johannes Thumshirn <jth@kernel.org> wrote:
>
> From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>
> Reserve one zone as a data relocation target on each mount. If we already
> find one empty block group, there's no need to force a chunk allocation,
> but we can use this empty data block group as our relocation target.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/block-group.c | 17 +++++++++++++
>  fs/btrfs/disk-io.c     |  2 ++
>  fs/btrfs/zoned.c       | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/zoned.h       |  3 +++
>  4 files changed, 89 insertions(+)
>
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index 9910bae89966..1195f6721c90 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -1500,6 +1500,15 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
>                         btrfs_put_block_group(block_group);
>                         continue;
>                 }
> +
> +               spin_lock(&fs_info->relocation_bg_lock);
> +               if (block_group->start == fs_info->data_reloc_bg) {
> +                       btrfs_put_block_group(block_group);
> +                       spin_unlock(&fs_info->relocation_bg_lock);
> +                       continue;
> +               }
> +               spin_unlock(&fs_info->relocation_bg_lock);
> +
>                 spin_unlock(&fs_info->unused_bgs_lock);
>
>                 btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
> @@ -1835,6 +1844,14 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>                                       bg_list);
>                 list_del_init(&bg->bg_list);
>
> +               spin_lock(&fs_info->relocation_bg_lock);
> +               if (bg->start == fs_info->data_reloc_bg) {
> +                       btrfs_put_block_group(bg);
> +                       spin_unlock(&fs_info->relocation_bg_lock);
> +                       continue;
> +               }
> +               spin_unlock(&fs_info->relocation_bg_lock);

Ok, so the reclaim task and cleaner kthread will not remove the
reserved block group.

But there's nothing preventing someone running balance manually, which
will delete the block group.

E.g. block group X is empty and reserved as the data relocation bg.
The balance ioctl is invoked, it goes through all block groups for relocation.
It happens that it first finds bg X. Deletes bg X.

Now there's no more reserved bg for data relocation, and other tasks
can come in and use the freed space and fill all of it or most of it.

Shouldn't we prevent the data reloc bg from being a target of a manual
relocation too?
E.g. have btrfs_relocate_chunk() do nothing if the bg is the data reloc bg.

Thanks.

> +
>                 space_info = bg->space_info;
>                 spin_unlock(&fs_info->unused_bgs_lock);
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 78d3966232ae..16bb52bcb69e 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3547,6 +3547,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
>         }
>         btrfs_discard_resume(fs_info);
>
> +       btrfs_reserve_relocation_bg(fs_info);
> +
>         if (fs_info->uuid_root &&
>             (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) ||
>              fs_info->generation != btrfs_super_uuid_tree_generation(disk_super))) {
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index c52a0063f7db..d291cf4f565e 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -17,6 +17,7 @@
>  #include "fs.h"
>  #include "accessors.h"
>  #include "bio.h"
> +#include "transaction.h"
>
>  /* Maximum number of zones to report per blkdev_report_zones() call */
>  #define BTRFS_REPORT_NR_ZONES   4096
> @@ -2637,3 +2638,69 @@ void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
>         }
>         spin_unlock(&fs_info->zone_active_bgs_lock);
>  }
> +
> +static u64 find_empty_block_group(struct btrfs_space_info *sinfo, u64 flags)
> +{
> +       struct btrfs_block_group *bg;
> +
> +       for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
> +               list_for_each_entry(bg, &sinfo->block_groups[i], list) {
> +                       if (bg->flags != flags)
> +                               continue;
> +                       if (bg->used == 0)
> +                               return bg->start;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info)
> +{
> +       struct btrfs_root *tree_root = fs_info->tree_root;
> +       struct btrfs_space_info *sinfo = fs_info->data_sinfo;
> +       struct btrfs_trans_handle *trans;
> +       struct btrfs_block_group *bg;
> +       u64 flags = btrfs_get_alloc_profile(fs_info, sinfo->flags);
> +       u64 bytenr = 0;
> +
> +       lockdep_assert_not_held(&fs_info->relocation_bg_lock);
> +
> +       if (!btrfs_is_zoned(fs_info))
> +               return;
> +
> +       if (fs_info->data_reloc_bg)
> +               return;
> +
> +       bytenr = find_empty_block_group(sinfo, flags);
> +       if (!bytenr) {
> +               int ret;
> +
> +               trans = btrfs_join_transaction(tree_root);
> +               if (IS_ERR(trans))
> +                       return;
> +
> +               ret = btrfs_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE);
> +               btrfs_end_transaction(trans);
> +               if (ret)
> +                       return;
> +
> +               bytenr = find_empty_block_group(sinfo, flags);
> +               if (!bytenr)
> +                       return;
> +
> +       }
> +
> +       bg = btrfs_lookup_block_group(fs_info, bytenr);
> +       if (!bg)
> +               return;
> +
> +       if (!btrfs_zone_activate(bg))
> +               bytenr = 0;
> +
> +       btrfs_put_block_group(bg);
> +
> +       spin_lock(&fs_info->relocation_bg_lock);
> +       fs_info->data_reloc_bg = bytenr;
> +       spin_unlock(&fs_info->relocation_bg_lock);
> +}
> diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
> index ff605beb84ef..56c1c19d52bc 100644
> --- a/fs/btrfs/zoned.h
> +++ b/fs/btrfs/zoned.h
> @@ -95,6 +95,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
>  int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
>                                 struct btrfs_space_info *space_info, bool do_finish);
>  void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
> +void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info);
>  #else /* CONFIG_BLK_DEV_ZONED */
>
>  static inline int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info)
> @@ -264,6 +265,8 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
>
>  static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { }
>
> +static inline void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info) { }
> +
>  #endif
>
>  static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
>
> --
> 2.43.0
>
>
diff mbox series

Patch

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 9910bae89966..1195f6721c90 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1500,6 +1500,15 @@  void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 			btrfs_put_block_group(block_group);
 			continue;
 		}
+
+		spin_lock(&fs_info->relocation_bg_lock);
+		if (block_group->start == fs_info->data_reloc_bg) {
+			btrfs_put_block_group(block_group);
+			spin_unlock(&fs_info->relocation_bg_lock);
+			continue;
+		}
+		spin_unlock(&fs_info->relocation_bg_lock);
+
 		spin_unlock(&fs_info->unused_bgs_lock);
 
 		btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
@@ -1835,6 +1844,14 @@  void btrfs_reclaim_bgs_work(struct work_struct *work)
 				      bg_list);
 		list_del_init(&bg->bg_list);
 
+		spin_lock(&fs_info->relocation_bg_lock);
+		if (bg->start == fs_info->data_reloc_bg) {
+			btrfs_put_block_group(bg);
+			spin_unlock(&fs_info->relocation_bg_lock);
+			continue;
+		}
+		spin_unlock(&fs_info->relocation_bg_lock);
+
 		space_info = bg->space_info;
 		spin_unlock(&fs_info->unused_bgs_lock);
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 78d3966232ae..16bb52bcb69e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3547,6 +3547,8 @@  int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 	}
 	btrfs_discard_resume(fs_info);
 
+	btrfs_reserve_relocation_bg(fs_info);
+
 	if (fs_info->uuid_root &&
 	    (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) ||
 	     fs_info->generation != btrfs_super_uuid_tree_generation(disk_super))) {
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index c52a0063f7db..d291cf4f565e 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -17,6 +17,7 @@ 
 #include "fs.h"
 #include "accessors.h"
 #include "bio.h"
+#include "transaction.h"
 
 /* Maximum number of zones to report per blkdev_report_zones() call */
 #define BTRFS_REPORT_NR_ZONES   4096
@@ -2637,3 +2638,69 @@  void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
 	}
 	spin_unlock(&fs_info->zone_active_bgs_lock);
 }
+
+static u64 find_empty_block_group(struct btrfs_space_info *sinfo, u64 flags)
+{
+	struct btrfs_block_group *bg;
+
+	for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+		list_for_each_entry(bg, &sinfo->block_groups[i], list) {
+			if (bg->flags != flags)
+				continue;
+			if (bg->used == 0)
+				return bg->start;
+		}
+	}
+
+	return 0;
+}
+
+void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_root *tree_root = fs_info->tree_root;
+	struct btrfs_space_info *sinfo = fs_info->data_sinfo;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_block_group *bg;
+	u64 flags = btrfs_get_alloc_profile(fs_info, sinfo->flags);
+	u64 bytenr = 0;
+
+	lockdep_assert_not_held(&fs_info->relocation_bg_lock);
+
+	if (!btrfs_is_zoned(fs_info))
+		return;
+
+	if (fs_info->data_reloc_bg)
+		return;
+
+	bytenr = find_empty_block_group(sinfo, flags);
+	if (!bytenr) {
+		int ret;
+
+		trans = btrfs_join_transaction(tree_root);
+		if (IS_ERR(trans))
+			return;
+
+		ret = btrfs_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE);
+		btrfs_end_transaction(trans);
+		if (ret)
+			return;
+
+		bytenr = find_empty_block_group(sinfo, flags);
+		if (!bytenr)
+			return;
+
+	}
+
+	bg = btrfs_lookup_block_group(fs_info, bytenr);
+	if (!bg)
+		return;
+
+	if (!btrfs_zone_activate(bg))
+		bytenr = 0;
+
+	btrfs_put_block_group(bg);
+
+	spin_lock(&fs_info->relocation_bg_lock);
+	fs_info->data_reloc_bg = bytenr;
+	spin_unlock(&fs_info->relocation_bg_lock);
+}
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index ff605beb84ef..56c1c19d52bc 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -95,6 +95,7 @@  int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
 int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
 				struct btrfs_space_info *space_info, bool do_finish);
 void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
+void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info);
 #else /* CONFIG_BLK_DEV_ZONED */
 
 static inline int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info)
@@ -264,6 +265,8 @@  static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
 
 static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { }
 
+static inline void btrfs_reserve_relocation_bg(struct btrfs_fs_info *fs_info) { }
+
 #endif
 
 static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)