diff mbox series

[5/5] btrfs: add preferred_metadata mode mount option

Message ID 20210117185435.36263-6-kreijack@libero.it (mailing list archive)
State New, archived
Headers show
Series [1/5] Add an ioctl to set the device properties | expand

Commit Message

Goffredo Baroncelli Jan. 17, 2021, 6:54 p.m. UTC
From: Goffredo Baroncelli <kreijack@inwind.it>

When this mode is enabled, the chunk allocation policy is modified
giving a different precedence between the disks depending by the chunk type.
A disk may be marked with the preferred_metadata flag to have higher chance
to host metadata.

There are 4 modes:
- preferred_metadata=disabled
  The allocator is the standard one.

- preferred_metadata=soft
  The metadata chunk are allocated on the disks marked with the
  "preferred_metadata" flag.
  The data chunk are allocated on the disks not marked with the
  "preferred_metadata" flag.
  If the space isn't enough, then it is possible to use the other kind
  of disks.

- preferred_metadata=hard
  The metadata chunk are allocated on the disks marked with the
  "preferred_metadata" flag.
  The data chunk are allocated on the disks not marked with the
  "preferred_metadata" flag.
  If the space isn't enough, then "no space left" error is raised. It
  is not possible to use the other kind of disks.

- preferred_metadata=metadata
  The metadata chunk are allocated on the disks marked with the
  "preferred_metadata" flag.
  For metadata, if the space isn't enough, then it is possible to use the
  other kind of disks.
  The data chunk are allocated on the disks not marked with the
  "preferred_metadata" flag.
  For data, if the space isn't enough, then "no space left" error is raised.
  It is not possible to use the other kind of disks.

To mark a disk as "preferred_metadata", use the command
# btrfs properties set <disk> preferred_metadata 1

To remove the flag "preferred_metadata" from a disk, use the command
# btrfs properties set <disk> preferred_metadata 0

Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
---
 fs/btrfs/volumes.c | 105 ++++++++++++++++++++++++++++++++++++++++++++-
 fs/btrfs/volumes.h |   1 +
 2 files changed, 104 insertions(+), 2 deletions(-)

Comments

Chen, Rong A Jan. 18, 2021, 3:05 a.m. UTC | #1
Hi Goffredo,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on kdave/for-next]
[also build test WARNING on v5.11-rc3 next-20210115]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Goffredo-Baroncelli/Add-an-ioctl-to-set-the-device-properties/20210118-035442
base:   https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-next
config: arm64-randconfig-s032-20210118 (attached as .config)
compiler: aarch64-linux-gcc (GCC) 9.3.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-208-g46a52ca4-dirty
        # https://github.com/0day-ci/linux/commit/6e3781fa16beb22201a3dac33326357fc0a0b7b9
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Goffredo-Baroncelli/Add-an-ioctl-to-set-the-device-properties/20210118-035442
        git checkout 6e3781fa16beb22201a3dac33326357fc0a0b7b9
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=arm64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <rong.a.chen@intel.com>


"sparse warnings: (new ones prefixed by >>)"
   fs/btrfs/tests/btrfs-tests.c: note: in included file:
>> fs/btrfs/tests/../volumes.h:367:33: sparse: sparse: dubious one-bit signed bitfield
--
   fs/btrfs/tests/inode-tests.c: note: in included file:
>> fs/btrfs/tests/../volumes.h:367:33: sparse: sparse: dubious one-bit signed bitfield
--
   fs/btrfs/tests/extent-map-tests.c: note: in included file:
>> fs/btrfs/tests/../volumes.h:367:33: sparse: sparse: dubious one-bit signed bitfield

vim +367 fs/btrfs/tests/../volumes.h

cea9e4452ebaf18d Chris Mason         2008-04-09  361  
b2117a39fa96cf48 Miao Xie            2011-01-05  362  struct btrfs_device_info {
b2117a39fa96cf48 Miao Xie            2011-01-05  363  	struct btrfs_device *dev;
b2117a39fa96cf48 Miao Xie            2011-01-05  364  	u64 dev_offset;
b2117a39fa96cf48 Miao Xie            2011-01-05  365  	u64 max_avail;
73c5de0051533cbd Arne Jansen         2011-04-12  366  	u64 total_avail;
6e3781fa16beb222 Goffredo Baroncelli 2021-01-17 @367  	int preferred_metadata:1;
b2117a39fa96cf48 Miao Xie            2011-01-05  368  };
b2117a39fa96cf48 Miao Xie            2011-01-05  369  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 68b346c5465d..9dc3f2473805 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4824,6 +4824,56 @@  static int btrfs_cmp_device_info(const void *a, const void *b)
 	return 0;
 }
 
+/*
+ * sort the devices in descending order by preferred_metadata,
+ * max_avail, total_avail
+ */
+static int btrfs_cmp_device_info_metadata(const void *a, const void *b)
+{
+	const struct btrfs_device_info *di_a = a;
+	const struct btrfs_device_info *di_b = b;
+
+	/* metadata -> preferred_metadata first */
+	if (di_a->preferred_metadata && !di_b->preferred_metadata)
+		return -1;
+	if (!di_a->preferred_metadata && di_b->preferred_metadata)
+		return 1;
+	if (di_a->max_avail > di_b->max_avail)
+		return -1;
+	if (di_a->max_avail < di_b->max_avail)
+		return 1;
+	if (di_a->total_avail > di_b->total_avail)
+		return -1;
+	if (di_a->total_avail < di_b->total_avail)
+		return 1;
+	return 0;
+}
+
+/*
+ * sort the devices in descending order by !preferred_metadata,
+ * max_avail, total_avail
+ */
+static int btrfs_cmp_device_info_data(const void *a, const void *b)
+{
+	const struct btrfs_device_info *di_a = a;
+	const struct btrfs_device_info *di_b = b;
+
+	/* data -> preferred_metadata last */
+	if (di_a->preferred_metadata && !di_b->preferred_metadata)
+		return 1;
+	if (!di_a->preferred_metadata && di_b->preferred_metadata)
+		return -1;
+	if (di_a->max_avail > di_b->max_avail)
+		return -1;
+	if (di_a->max_avail < di_b->max_avail)
+		return 1;
+	if (di_a->total_avail > di_b->total_avail)
+		return -1;
+	if (di_a->total_avail < di_b->total_avail)
+		return 1;
+	return 0;
+}
+
 static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
 {
 	if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
@@ -4939,6 +4989,7 @@  static int gather_device_info(struct btrfs_fs_devices *fs_devices,
 	int ndevs = 0;
 	u64 max_avail;
 	u64 dev_offset;
+	int nr_preferred_metadata = 0;
 
 	/*
 	 * in the first pass through the devices list, we gather information
@@ -4991,15 +5042,65 @@  static int gather_device_info(struct btrfs_fs_devices *fs_devices,
 		devices_info[ndevs].max_avail = max_avail;
 		devices_info[ndevs].total_avail = total_avail;
 		devices_info[ndevs].dev = device;
+		devices_info[ndevs].preferred_metadata = !!(device->type &
+			BTRFS_DEV_PREFERRED_METADATA);
+		if (devices_info[ndevs].preferred_metadata)
+			nr_preferred_metadata++;
 		++ndevs;
 	}
 	ctl->ndevs = ndevs;
 
+	BUG_ON(nr_preferred_metadata > ndevs);
 	/*
 	 * now sort the devices by hole size / available space
 	 */
-	sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
-	     btrfs_cmp_device_info, NULL);
+	if (((ctl->type & BTRFS_BLOCK_GROUP_DATA) &&
+	     (ctl->type & BTRFS_BLOCK_GROUP_METADATA)) ||
+	    info->preferred_metadata_mode == BTRFS_PM_DISABLED) {
+		/* mixed bg or PREFERRED_METADATA not set */
+		sort(devices_info, ctl->ndevs, sizeof(struct btrfs_device_info),
+			     btrfs_cmp_device_info, NULL);
+	} else {
+		/*
+		 * if PREFERRED_METADATA is set, sort the device considering
+		 * also the kind (preferred_metadata or not). Limit the
+		 * availables devices to the ones of the same kind, to avoid
+		 * that a striped profile, like raid5, spreads to all kind of
+		 * devices.
+		 * It is allowed to use different kinds of devices (if the ones
+		 * of the same kind are not enough alone) in the following
+		 * case:
+		 * - preferred_metadata_mode == BTRFS_PM_SOFT:
+		 *               use the device of the same kind until these
+		 *               are enough. Otherwise it is allowed to
+		 *               use all the devices
+		 * - preferred_metadata_mode == BTRFS_PM_HARD
+		 *               use the device of the same kind; if these are
+		 *		 not enough, then an error will be raised raised
+		 * - preferred_metadata_mode == BTRFS_PM_METADATA
+		 *               metadata/system -> as BTRFS_PM_SOFT
+		 *               data -> as BTRFS_PM_HARD
+		 */
+		if (ctl->type & BTRFS_BLOCK_GROUP_DATA) {
+			int nr_data = ctl->ndevs - nr_preferred_metadata;
+			sort(devices_info, ctl->ndevs,
+				     sizeof(struct btrfs_device_info),
+				     btrfs_cmp_device_info_data, NULL);
+			if (info->preferred_metadata_mode == BTRFS_PM_HARD ||
+			    info->preferred_metadata_mode == BTRFS_PM_METADATA)
+				ctl->ndevs = nr_data;
+			else if (nr_data >= ctl->devs_min)
+				ctl->ndevs = nr_data;
+		} else { /* non data -> metadata and system */
+			sort(devices_info, ctl->ndevs,
+				     sizeof(struct btrfs_device_info),
+				     btrfs_cmp_device_info_metadata, NULL);
+			if (info->preferred_metadata_mode == BTRFS_PM_HARD)
+				ctl->ndevs = nr_preferred_metadata;
+			else if (nr_preferred_metadata >= ctl->devs_min)
+				ctl->ndevs = nr_preferred_metadata;
+		}
+	}
 
 	return 0;
 }
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d776b7f55d56..fc8da51e739b 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -364,6 +364,7 @@  struct btrfs_device_info {
 	u64 dev_offset;
 	u64 max_avail;
 	u64 total_avail;
+	int preferred_metadata:1;
 };
 
 struct btrfs_raid_attr {