diff mbox series

[4/6] btrfs: periodic block_group reclaim

Message ID 1173e535ec7b46bda33ed2dc4219027502763902.1706914865.git.boris@bur.io (mailing list archive)
State New, archived
Headers show
Series btrfs: dynamic and periodic block_group reclaim | expand

Commit Message

Boris Burkov Feb. 2, 2024, 11:12 p.m. UTC
We currently employ a edge-triggered block group reclaim strategy which
marks block groups for reclaim as they free down past a threshold.

With a dynamic threshold, this is worse than doing it in a
level-triggered fashion periodically. That is because the reclaim
itself happens periodically, so the threshold at that point in time is
what really matters, not the threshold at freeing time. If we mark the
reclaim in a big pass, then sort by usage and do reclaim, we also
benefit from a negative feedback loop preventing unnecessary reclaims as
we crunch through the "best" candidates.

Since this is quite a different model, it requires some additional
support. The edge triggered reclaim has a good heuristic for not
reclaiming fresh block groups, so we need to replace that with a typical
GC sweep mark which skips block groups that have seen an allocation
since the last sweep.

Signed-off-by: Boris Burkov <boris@bur.io>
---
 fs/btrfs/block-group.c |  2 ++
 fs/btrfs/block-group.h |  1 +
 fs/btrfs/space-info.c  | 51 ++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/space-info.h  |  7 ++++++
 fs/btrfs/sysfs.c       | 34 ++++++++++++++++++++++++++++
 5 files changed, 95 insertions(+)

Comments

kernel test robot Feb. 4, 2024, 6:19 p.m. UTC | #1
Hi Boris,

kernel test robot noticed the following build warnings:

[auto build test WARNING on kdave/for-next]
[also build test WARNING on linus/master v6.8-rc2 next-20240202]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Boris-Burkov/btrfs-report-reclaim-count-in-sysfs/20240203-071516
base:   https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-next
patch link:    https://lore.kernel.org/r/1173e535ec7b46bda33ed2dc4219027502763902.1706914865.git.boris%40bur.io
patch subject: [PATCH 4/6] btrfs: periodic block_group reclaim
config: i386-randconfig-141-20240204 (https://download.01.org/0day-ci/archive/20240205/202402050244.pa6hJds3-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202402050244.pa6hJds3-lkp@intel.com/

smatch warnings:
fs/btrfs/space-info.c:1996 btrfs_reclaim_sweep() error: uninitialized symbol 'ret'.

vim +/ret +1996 fs/btrfs/space-info.c

  1977	
  1978	int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info)
  1979	{
  1980		int ret;
  1981		int raid;
  1982		struct btrfs_space_info *space_info;
  1983	
  1984		list_for_each_entry(space_info, &fs_info->space_info, list) {
  1985			if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
  1986				continue;
  1987			if (!READ_ONCE(space_info->periodic_reclaim))
  1988				continue;
  1989			for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) {
  1990				ret = do_reclaim_sweep(fs_info, space_info, raid);
  1991				if (ret)
  1992					return ret;
  1993			}
  1994		}
  1995	
> 1996		return ret;
diff mbox series

Patch

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 6244c76f3584..1a752a8a1bea 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1898,6 +1898,7 @@  void btrfs_reclaim_bgs_work(struct work_struct *work)
 
 void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
 {
+	btrfs_reclaim_sweep(fs_info);
 	spin_lock(&fs_info->unused_bgs_lock);
 	if (!list_empty(&fs_info->reclaim_bgs))
 		queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
@@ -3565,6 +3566,7 @@  int btrfs_update_block_group(struct btrfs_trans_handle *trans,
 		old_val += num_bytes;
 		cache->used = old_val;
 		cache->reserved -= num_bytes;
+		cache->reclaim_mark = 0;
 		space_info->bytes_reserved -= num_bytes;
 		space_info->bytes_used += num_bytes;
 		space_info->disk_used += num_bytes * factor;
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index c4a1f01cc1c2..24b576b7a88c 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -250,6 +250,7 @@  struct btrfs_block_group {
 	struct work_struct zone_finish_work;
 	struct extent_buffer *last_eb;
 	enum btrfs_block_group_size_class size_class;
+	u64 reclaim_mark;
 };
 
 static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 86a87501af08..fc4e307669ef 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -1944,3 +1944,54 @@  int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info)
 		return calc_dynamic_reclaim_threshold(space_info);
 	return READ_ONCE(space_info->bg_reclaim_threshold);
 }
+
+static int do_reclaim_sweep(struct btrfs_fs_info *fs_info,
+			    struct btrfs_space_info *space_info, int raid)
+{
+	struct btrfs_block_group *bg;
+	int thresh_pct;
+
+	spin_lock(&space_info->lock);
+	thresh_pct = btrfs_calc_reclaim_threshold(space_info);
+	spin_unlock(&space_info->lock);
+
+	down_read(&space_info->groups_sem);
+	list_for_each_entry(bg, &space_info->block_groups[raid], list) {
+		u64 thresh;
+		bool reclaim = false;
+
+		btrfs_get_block_group(bg);
+		spin_lock(&bg->lock);
+		thresh = mult_perc(bg->length, thresh_pct);
+		if (bg->used < thresh && bg->reclaim_mark)
+			reclaim = true;
+		bg->reclaim_mark++;
+		spin_unlock(&bg->lock);
+		if (reclaim)
+			btrfs_mark_bg_to_reclaim(bg);
+		btrfs_put_block_group(bg);
+	}
+	up_read(&space_info->groups_sem);
+	return 0;
+}
+
+int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info)
+{
+	int ret;
+	int raid;
+	struct btrfs_space_info *space_info;
+
+	list_for_each_entry(space_info, &fs_info->space_info, list) {
+		if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
+			continue;
+		if (!READ_ONCE(space_info->periodic_reclaim))
+			continue;
+		for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) {
+			ret = do_reclaim_sweep(fs_info, space_info, raid);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return ret;
+}
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 2f4c00525a08..2917bc4247db 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -169,6 +169,12 @@  struct btrfs_space_info {
 	 * fixed bg_reclaim_threshold.
 	 */
 	bool dynamic_reclaim;
+
+	/*
+	 * Periodically check all block groups against the reclaim
+	 * threshold in the cleaner thread.
+	 */
+	bool periodic_reclaim;
 };
 
 struct reserve_ticket {
@@ -252,5 +258,6 @@  void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
 
 int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info);
+int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info);
 
 #endif /* BTRFS_SPACE_INFO_H */
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 0683a23e5254..98bd8efaa2dc 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -971,6 +971,39 @@  BTRFS_ATTR_RW(space_info, dynamic_reclaim,
 	      btrfs_sinfo_dynamic_reclaim_show,
 	      btrfs_sinfo_dynamic_reclaim_store);
 
+static ssize_t btrfs_sinfo_periodic_reclaim_show(struct kobject *kobj,
+						struct kobj_attribute *a,
+						char *buf)
+{
+	struct btrfs_space_info *space_info = to_space_info(kobj);
+
+	return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->periodic_reclaim));
+}
+
+static ssize_t btrfs_sinfo_periodic_reclaim_store(struct kobject *kobj,
+						 struct kobj_attribute *a,
+						 const char *buf, size_t len)
+{
+	struct btrfs_space_info *space_info = to_space_info(kobj);
+	int periodic_reclaim;
+	int ret;
+
+	ret = kstrtoint(buf, 10, &periodic_reclaim);
+	if (ret)
+		return ret;
+
+	if (periodic_reclaim < 0)
+		return -EINVAL;
+
+	WRITE_ONCE(space_info->periodic_reclaim, periodic_reclaim != 0);
+
+	return len;
+}
+
+BTRFS_ATTR_RW(space_info, periodic_reclaim,
+	      btrfs_sinfo_periodic_reclaim_show,
+	      btrfs_sinfo_periodic_reclaim_store);
+
 /*
  * Allocation information about block group types.
  *
@@ -992,6 +1025,7 @@  static struct attribute *space_info_attrs[] = {
 	BTRFS_ATTR_PTR(space_info, chunk_size),
 	BTRFS_ATTR_PTR(space_info, size_classes),
 	BTRFS_ATTR_PTR(space_info, reclaim_count),
+	BTRFS_ATTR_PTR(space_info, periodic_reclaim),
 #ifdef CONFIG_BTRFS_DEBUG
 	BTRFS_ATTR_PTR(space_info, force_chunk_alloc),
 #endif