diff mbox series

[2/2] btrfs: detect multi-dev stripe and disable automatic inline checksum

Message ID 7ce85c808b96be3c8352ffa03fedbaacf0dc6d27.1705568050.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: disable inline checksum for multi-dev striped FS | expand

Commit Message

Naohiro Aota Jan. 18, 2024, 8:54 a.m. UTC
Currently, we disable offloading checksum to workqueues if the checksum
algorithm implementation is fast. However, it may not be fast enough for
multiple device striping, as reported in the links.

For example, below is a result running the following fio benchmark on 6
devices RAID0 (both data and metadata) btrfs.

fio --group_reporting --rw=write --fallocate=none \
     --direct=0 --ioengine=libaio --iodepth=32 --end_fsync=1 \
     --filesize=200G bs=$((64 * 6))k \
     --time_based --runtime=300s \
     --directory=/mnt --name=writer --numjobs=6

with this patch (offloading checksum to workques)
    WRITE: bw=2084MiB/s (2185MB/s), 2084MiB/s-2084MiB/s (2185MB/s-2185MB/s), io=750GiB (806GB), run=368752-368752msec
without this patch (inline checksum)
    WRITE: bw=1447MiB/s (1517MB/s), 1447MiB/s-1447MiB/s (1517MB/s-1517MB/s), io=517GiB (555GB), run=366017-366017msec

So, it is better to disable inline checksum when there is a block group
stripe-writing to several devices (RAID0/10/5/6). For now, I simply set the
threshold to 2, so if there are more than 2 devices, it disables the inline
checksum.

For reference, here is a result on 1 device RAID0 setup. No degradation
introduced as expected.

with this patch:
    WRITE: bw=445MiB/s (467MB/s), 445MiB/s-445MiB/s (467MB/s-467MB/s), io=302GiB (324GB), run=694199-694199msec
without this patch
    WRITE: bw=441MiB/s (462MB/s), 441MiB/s-441MiB/s (462MB/s-462MB/s), io=300GiB (322GB), run=696125-696125msec

Link: https://lore.kernel.org/linux-btrfs/20230731152223.4EFB.409509F4@e16-tech.com/
Link: https://lore.kernel.org/linux-btrfs/p3vo3g7pqn664mhmdhlotu5dzcna6vjtcoc2hb2lsgo2fwct7k@xzaxclba5tae/
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/bio.c     |  8 ++++++--
 fs/btrfs/volumes.c | 20 ++++++++++++++++++++
 fs/btrfs/volumes.h |  2 ++
 3 files changed, 28 insertions(+), 2 deletions(-)

Comments

Johannes Thumshirn Jan. 19, 2024, 3:29 p.m. UTC | #1
On 18.01.24 09:55, Naohiro Aota wrote:
> +static void check_striped_block_group(struct btrfs_fs_info *info, u64 type, int num_stripes)
> +{
> +	if (btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max != 0 ||
> +	    num_stripes <= BTRFS_INLINE_CSUM_MAX_DEVS)
> +		return;
> +
> +	/*
> +	 * Found a block group writing to multiple devices, disable
> +	 * inline automatic checksum.
> +	 */
> +	info->fs_devices->striped_writing = true;
> +}
> +

This function adds some overly long lines.
Naohiro Aota Jan. 22, 2024, 8:02 a.m. UTC | #2
On Fri, Jan 19, 2024 at 03:29:13PM +0000, Johannes Thumshirn wrote:
> On 18.01.24 09:55, Naohiro Aota wrote:
> > +static void check_striped_block_group(struct btrfs_fs_info *info, u64 type, int num_stripes)
> > +{
> > +	if (btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max != 0 ||
> > +	    num_stripes <= BTRFS_INLINE_CSUM_MAX_DEVS)
> > +		return;
> > +
> > +	/*
> > +	 * Found a block group writing to multiple devices, disable
> > +	 * inline automatic checksum.
> > +	 */
> > +	info->fs_devices->striped_writing = true;
> > +}
> > +
> 
> This function adds some overly long lines.

Oh, the checkpatch didn't gave an error. But, I'll move "num_stripes" in
the next version, well, if it's still needed.
David Sterba Jan. 22, 2024, 9:11 p.m. UTC | #3
On Fri, Jan 19, 2024 at 03:29:13PM +0000, Johannes Thumshirn wrote:
> On 18.01.24 09:55, Naohiro Aota wrote:
> > +static void check_striped_block_group(struct btrfs_fs_info *info, u64 type, int num_stripes)
> > +{
> > +	if (btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max != 0 ||
> > +	    num_stripes <= BTRFS_INLINE_CSUM_MAX_DEVS)
> > +		return;
> > +
> > +	/*
> > +	 * Found a block group writing to multiple devices, disable
> > +	 * inline automatic checksum.
> > +	 */
> > +	info->fs_devices->striped_writing = true;
> > +}
> > +
> 
> This function adds some overly long lines.

The line length limits are not strict and checkpatch has been updated
from 80 to 100, so it's up to our taste. For the prototypes it's around
85-ish to be ok.
diff mbox series

Patch

diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 222ee52a3af1..dfdba72ea0da 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -614,9 +614,13 @@  static bool should_async_write(struct btrfs_bio *bbio)
 	if (fs_devices->inline_csum_mode == BTRFS_INLINE_CSUM_FORCE_ON)
 		return false;
 
-	/* Submit synchronously if the checksum implementation is fast. */
+	/*
+	 * Submit synchronously if the checksum implementation is
+	 * fast, and it is not backed by multiple devices striping.
+	 */
 	if (fs_devices->inline_csum_mode == BTRFS_INLINE_CSUM_AUTO &&
-	    test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
+	    test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags) &&
+	    !fs_devices->striped_writing)
 		return false;
 
 	/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d67785be2c77..79c1af049e9e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -41,6 +41,12 @@ 
 					 BTRFS_BLOCK_GROUP_RAID10 | \
 					 BTRFS_BLOCK_GROUP_RAID56_MASK)
 
+/*
+ * Maximum number of devices automatically enabling inline checksum for
+ * a striped write.
+ */
+#define BTRFS_INLINE_CSUM_MAX_DEVS 2
+
 struct btrfs_io_geometry {
 	u32 stripe_index;
 	u32 stripe_nr;
@@ -5124,6 +5130,19 @@  static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type)
 	btrfs_set_fs_incompat(info, RAID1C34);
 }
 
+static void check_striped_block_group(struct btrfs_fs_info *info, u64 type, int num_stripes)
+{
+	if (btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max != 0 ||
+	    num_stripes <= BTRFS_INLINE_CSUM_MAX_DEVS)
+		return;
+
+	/*
+	 * Found a block group writing to multiple devices, disable
+	 * inline automatic checksum.
+	 */
+	info->fs_devices->striped_writing = true;
+}
+
 /*
  * Structure used internally for btrfs_create_chunk() function.
  * Wraps needed parameters.
@@ -5592,6 +5611,7 @@  static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
 
 	check_raid56_incompat_flag(info, type);
 	check_raid1c34_incompat_flag(info, type);
+	check_striped_block_group(info, type, map->num_stripes);
 
 	return block_group;
 }
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index f21cfe268be9..c32501985c64 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -384,6 +384,8 @@  struct btrfs_fs_devices {
 	bool seeding;
 	/* The mount needs to use a randomly generated fsid. */
 	bool temp_fsid;
+	/* Has a block group writing stripes to multiple devices. (RAID0/10/5/6). */
+	bool striped_writing;
 
 	struct btrfs_fs_info *fs_info;
 	/* sysfs kobjects */