diff mbox series

[v3,03/12] btrfs: introduce a new helper to submit write bio for scrub

Message ID c9482839875af328d7fe5ff6a9bebdc84c33c5ab.1679278088.git.wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: scrub: use a more reader friendly code to implement scrub_simple_mirror() | expand

Commit Message

Qu Wenruo March 20, 2023, 2:12 a.m. UTC
Just like the special scrub read, scrub write also has its extra niches:

- Only write back to single device
  Even for read-repair on RAID56, we only update the corrupted data
  stripe itself, not triggering the full RMW path.

  This makes scrub writeback a perfect match for the single stripe
  quick path.

- Requires a valid @mirror_num
  For RAID56 case, only @mirror_num == 1 is supported.
  For non-RAID56 cases, we need @mirror_num to locate our stripe.

- Need to manually specify if it's for dev-replace
  For scrub path we can write back to the original device (for
  read-repair) and to the target device (for replace) at the same
  time, but with different sectors (read-repair only writes repaired
  sectors, while dev-replace writes all good sectors).

  So here we need a bool to specify the case.

- No data csum generation

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/bio.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/bio.h |  3 ++
 2 files changed, 95 insertions(+)

Comments

David Sterba March 21, 2023, 12:14 a.m. UTC | #1
On Mon, Mar 20, 2023 at 10:12:49AM +0800, Qu Wenruo wrote:
> diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
> index b96f40160b08..633447b6ba44 100644
> --- a/fs/btrfs/bio.c
> +++ b/fs/btrfs/bio.c
> +	/* Map the RAID56 multi-stripe writes to a single one. */
> +	if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
> +		int data_stripes = bioc->map_type & BTRFS_BLOCK_GROUP_RAID5 ?
> +				   bioc->num_stripes - 1 : bioc->num_stripes - 2;

When ternary operator is used in expression, please put ( ) around it so
it's clear where it starts.

> +		int i;
> +
> +		/* This special write only works for data stripes. */
> +		ASSERT(mirror_num == 1);
> +		for (i = 0; i < data_stripes; i++) {

		for (int i = 0; ...)

We can now use the iterator value defined inside for (), it's relatively
new due to bumped minimum compiler version so I'd like to see it used
where possible.

> +			u64 stripe_start = bioc->full_stripe_logical +
> +					   (i << BTRFS_STRIPE_LEN_SHIFT);
> +
> +			if (logical >= stripe_start &&
> +			    logical < stripe_start + BTRFS_STRIPE_LEN)
> +				break;
> +		}
> +		ASSERT(i < data_stripes);
> +		smap.dev = bioc->stripes[i].dev;
> +		smap.physical = bioc->stripes[i].physical +
> +				((logical - bioc->full_stripe_logical) &
> +				 BTRFS_STRIPE_LEN_MASK);
> +		goto submit;
> +	}
> +	ASSERT(mirror_num <= bioc->num_stripes);
> +	smap.dev = bioc->stripes[mirror_num - 1].dev;
> +	smap.physical = bioc->stripes[mirror_num - 1].physical;
> +submit:
> +	ASSERT(smap.dev);
> +	btrfs_put_bioc(bioc);
> +	bioc = NULL;
> +	if (dev_replace) {
> +		ASSERT(smap.dev == fs_info->dev_replace.srcdev);
> +		smap.dev = fs_info->dev_replace.tgtdev;
> +	}
> +	__btrfs_submit_bio(&bbio->bio, bioc, &smap, mirror_num);
> +	return;
> +
> +fail:
> +	btrfs_bio_counter_dec(fs_info);
> +	btrfs_bio_end_io(orig_bbio, ret);
> +}
Qu Wenruo March 21, 2023, 12:54 a.m. UTC | #2
On 2023/3/21 08:14, David Sterba wrote:
> On Mon, Mar 20, 2023 at 10:12:49AM +0800, Qu Wenruo wrote:
>> diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
>> index b96f40160b08..633447b6ba44 100644
>> --- a/fs/btrfs/bio.c
>> +++ b/fs/btrfs/bio.c
>> +	/* Map the RAID56 multi-stripe writes to a single one. */
>> +	if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
>> +		int data_stripes = bioc->map_type & BTRFS_BLOCK_GROUP_RAID5 ?
>> +				   bioc->num_stripes - 1 : bioc->num_stripes - 2;
> 
> When ternary operator is used in expression, please put ( ) around it so
> it's clear where it starts.
> 
>> +		int i;
>> +
>> +		/* This special write only works for data stripes. */
>> +		ASSERT(mirror_num == 1);
>> +		for (i = 0; i < data_stripes; i++) {
> 
> 		for (int i = 0; ...)
> 
> We can now use the iterator value defined inside for (), it's relatively
> new due to bumped minimum compiler version so I'd like to see it used
> where possible.

Just one question.

There are quite some for loops in the last few patches.

In that case, should we still go the "for (int i = 0;...)" way?
Or it's better to declare "i" as the old way?

Thanks,
Qu

> 
>> +			u64 stripe_start = bioc->full_stripe_logical +
>> +					   (i << BTRFS_STRIPE_LEN_SHIFT);
>> +
>> +			if (logical >= stripe_start &&
>> +			    logical < stripe_start + BTRFS_STRIPE_LEN)
>> +				break;
>> +		}
>> +		ASSERT(i < data_stripes);
>> +		smap.dev = bioc->stripes[i].dev;
>> +		smap.physical = bioc->stripes[i].physical +
>> +				((logical - bioc->full_stripe_logical) &
>> +				 BTRFS_STRIPE_LEN_MASK);
>> +		goto submit;
>> +	}
>> +	ASSERT(mirror_num <= bioc->num_stripes);
>> +	smap.dev = bioc->stripes[mirror_num - 1].dev;
>> +	smap.physical = bioc->stripes[mirror_num - 1].physical;
>> +submit:
>> +	ASSERT(smap.dev);
>> +	btrfs_put_bioc(bioc);
>> +	bioc = NULL;
>> +	if (dev_replace) {
>> +		ASSERT(smap.dev == fs_info->dev_replace.srcdev);
>> +		smap.dev = fs_info->dev_replace.tgtdev;
>> +	}
>> +	__btrfs_submit_bio(&bbio->bio, bioc, &smap, mirror_num);
>> +	return;
>> +
>> +fail:
>> +	btrfs_bio_counter_dec(fs_info);
>> +	btrfs_bio_end_io(orig_bbio, ret);
>> +}
David Sterba March 21, 2023, 1:27 a.m. UTC | #3
On Tue, Mar 21, 2023 at 08:54:22AM +0800, Qu Wenruo wrote:
> 
> 
> On 2023/3/21 08:14, David Sterba wrote:
> > On Mon, Mar 20, 2023 at 10:12:49AM +0800, Qu Wenruo wrote:
> >> diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
> >> index b96f40160b08..633447b6ba44 100644
> >> --- a/fs/btrfs/bio.c
> >> +++ b/fs/btrfs/bio.c
> >> +	/* Map the RAID56 multi-stripe writes to a single one. */
> >> +	if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
> >> +		int data_stripes = bioc->map_type & BTRFS_BLOCK_GROUP_RAID5 ?
> >> +				   bioc->num_stripes - 1 : bioc->num_stripes - 2;
> > 
> > When ternary operator is used in expression, please put ( ) around it so
> > it's clear where it starts.
> > 
> >> +		int i;
> >> +
> >> +		/* This special write only works for data stripes. */
> >> +		ASSERT(mirror_num == 1);
> >> +		for (i = 0; i < data_stripes; i++) {
> > 
> > 		for (int i = 0; ...)
> > 
> > We can now use the iterator value defined inside for (), it's relatively
> > new due to bumped minimum compiler version so I'd like to see it used
> > where possible.
> 
> Just one question.
> 
> There are quite some for loops in the last few patches.
> 
> In that case, should we still go the "for (int i = 0;...)" way?
> Or it's better to declare "i" as the old way?

For 'i' I'd use the for() declarations, even if it's for multiple loops.
Qu Wenruo March 23, 2023, 8:48 a.m. UTC | #4
On 2023/3/21 08:14, David Sterba wrote:
> On Mon, Mar 20, 2023 at 10:12:49AM +0800, Qu Wenruo wrote:
>> diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
>> index b96f40160b08..633447b6ba44 100644
>> --- a/fs/btrfs/bio.c
>> +++ b/fs/btrfs/bio.c
>> +	/* Map the RAID56 multi-stripe writes to a single one. */
>> +	if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
>> +		int data_stripes = bioc->map_type & BTRFS_BLOCK_GROUP_RAID5 ?
>> +				   bioc->num_stripes - 1 : bioc->num_stripes - 2;
> 
> When ternary operator is used in expression, please put ( ) around it so
> it's clear where it starts.
> 
>> +		int i;
>> +
>> +		/* This special write only works for data stripes. */
>> +		ASSERT(mirror_num == 1);
>> +		for (i = 0; i < data_stripes; i++) {
> 
> 		for (int i = 0; ...)
> 
> We can now use the iterator value defined inside for (), it's relatively
> new due to bumped minimum compiler version so I'd like to see it used
> where possible.

Although I'll follow the new "for (int i,...)" style, this particular 
location can not be replaced, as @i would be later used to grab the 
physical and dev.

Thanks,
Qu
> 
>> +			u64 stripe_start = bioc->full_stripe_logical +
>> +					   (i << BTRFS_STRIPE_LEN_SHIFT);
>> +
>> +			if (logical >= stripe_start &&
>> +			    logical < stripe_start + BTRFS_STRIPE_LEN)
>> +				break;
>> +		}
>> +		ASSERT(i < data_stripes);
>> +		smap.dev = bioc->stripes[i].dev;
>> +		smap.physical = bioc->stripes[i].physical +
>> +				((logical - bioc->full_stripe_logical) &
>> +				 BTRFS_STRIPE_LEN_MASK);
>> +		goto submit;
>> +	}
>> +	ASSERT(mirror_num <= bioc->num_stripes);
>> +	smap.dev = bioc->stripes[mirror_num - 1].dev;
>> +	smap.physical = bioc->stripes[mirror_num - 1].physical;
>> +submit:
>> +	ASSERT(smap.dev);
>> +	btrfs_put_bioc(bioc);
>> +	bioc = NULL;
>> +	if (dev_replace) {
>> +		ASSERT(smap.dev == fs_info->dev_replace.srcdev);
>> +		smap.dev = fs_info->dev_replace.tgtdev;
>> +	}
>> +	__btrfs_submit_bio(&bbio->bio, bioc, &smap, mirror_num);
>> +	return;
>> +
>> +fail:
>> +	btrfs_bio_counter_dec(fs_info);
>> +	btrfs_bio_end_io(orig_bbio, ret);
>> +}
diff mbox series

Patch

diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index b96f40160b08..633447b6ba44 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -733,6 +733,98 @@  void btrfs_submit_scrub_read(struct btrfs_fs_info *fs_info,
 	btrfs_bio_end_io(orig_bbio, ret);
 }
 
+/*
+ * Scrub write special version. Some extra limits:
+ *
+ * - Only support write back for dev-replace and read-repair.
+ *   This means, the write bio, even for RAID56, would only
+ *   be mapped to single device.
+ *
+ * - @mirror_num must be >0.
+ *   To indicate which mirror to be written.
+ *   If it's RAID56, it must be 1 (data stripes).
+ *
+ * - The @bbio must not cross stripe boundary.
+ *
+ * - If @dev_replace is true, the resulted stripe must be mapped to
+ *   replace source device.
+ *
+ * - No csum geneartion.
+ */
+void btrfs_submit_scrub_write(struct btrfs_fs_info *fs_info,
+			      struct btrfs_bio *bbio, int mirror_num,
+			      bool dev_replace)
+{
+	struct btrfs_bio *orig_bbio = bbio;
+	u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+	u64 length = bbio->bio.bi_iter.bi_size;
+	u64 map_length = length;
+	struct btrfs_io_context *bioc = NULL;
+	struct btrfs_io_stripe smap;
+	int ret;
+
+	ASSERT(mirror_num > 0);
+	ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
+	ASSERT(!bbio->inode);
+
+	bbio->fs_info = fs_info;
+	btrfs_bio_counter_inc_blocked(fs_info);
+	ret = __btrfs_map_block(fs_info, btrfs_op(&bbio->bio), logical,
+				&map_length, &bioc, &smap, &mirror_num, 1);
+	if (ret)
+		goto fail;
+
+	/* Caller should ensure the @bbio doesn't cross stripe boundary. */
+	ASSERT(map_length >= length);
+	if (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && btrfs_is_zoned(fs_info)) {
+		bbio->bio.bi_opf &= ~REQ_OP_WRITE;
+		bbio->bio.bi_opf |= REQ_OP_ZONE_APPEND;
+	}
+
+	if (!bioc)
+		goto submit;
+	/* Map the RAID56 multi-stripe writes to a single one. */
+	if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+		int data_stripes = bioc->map_type & BTRFS_BLOCK_GROUP_RAID5 ?
+				   bioc->num_stripes - 1 : bioc->num_stripes - 2;
+		int i;
+
+		/* This special write only works for data stripes. */
+		ASSERT(mirror_num == 1);
+		for (i = 0; i < data_stripes; i++) {
+			u64 stripe_start = bioc->full_stripe_logical +
+					   (i << BTRFS_STRIPE_LEN_SHIFT);
+
+			if (logical >= stripe_start &&
+			    logical < stripe_start + BTRFS_STRIPE_LEN)
+				break;
+		}
+		ASSERT(i < data_stripes);
+		smap.dev = bioc->stripes[i].dev;
+		smap.physical = bioc->stripes[i].physical +
+				((logical - bioc->full_stripe_logical) &
+				 BTRFS_STRIPE_LEN_MASK);
+		goto submit;
+	}
+	ASSERT(mirror_num <= bioc->num_stripes);
+	smap.dev = bioc->stripes[mirror_num - 1].dev;
+	smap.physical = bioc->stripes[mirror_num - 1].physical;
+submit:
+	ASSERT(smap.dev);
+	btrfs_put_bioc(bioc);
+	bioc = NULL;
+	if (dev_replace) {
+		ASSERT(smap.dev == fs_info->dev_replace.srcdev);
+		smap.dev = fs_info->dev_replace.tgtdev;
+	}
+	__btrfs_submit_bio(&bbio->bio, bioc, &smap, mirror_num);
+	return;
+
+fail:
+	btrfs_bio_counter_dec(fs_info);
+	btrfs_bio_end_io(orig_bbio, ret);
+}
+
 void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num)
 {
 	while (!btrfs_submit_chunk(bbio, mirror_num))
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index 073df13365e4..d5b4a15dde35 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -104,6 +104,9 @@  static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
 void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num);
 void btrfs_submit_scrub_read(struct btrfs_fs_info *fs_info,
 			     struct btrfs_bio *bbio, int mirror_num);
+void btrfs_submit_scrub_write(struct btrfs_fs_info *fs_info,
+			      struct btrfs_bio *bbio, int mirror_num,
+			      bool dev_replace);
 int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
 			    u64 length, u64 logical, struct page *page,
 			    unsigned int pg_offset, int mirror_num);