diff mbox series

[v2,02/12] btrfs: introduce a new helper to submit bio for scrub

Message ID 27af1ebdc7a7048895be3eaccd3fb437337e1830.1678777941.git.wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: scrub: use a more reader friendly code to implement scrub_simple_mirror() | expand

Commit Message

Qu Wenruo March 14, 2023, 7:34 a.m. UTC
The new helper, btrfs_submit_scrub_read(), would be mostly a subset of
btrfs_submit_bio(), with the following limitations:

- Only supports read
- @mirror_num must be > 0
- No read-time repair nor checksum verification
- The @bbio must not cross stripe boundary

This would provide the basis for unified read repair for scrub, as we no
longer needs to handle RAID56 recovery all by scrub, and RAID56 data
stripes scrub can share the same code of read and repair.

The repair part would be the same as non-RAID56, as we only need to try
the next mirror.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/bio.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
 fs/btrfs/bio.h | 19 +++++++++++++++++--
 2 files changed, 63 insertions(+), 6 deletions(-)

Comments

Christoph Hellwig March 15, 2023, 7:54 a.m. UTC | #1
> +	 * @inode can be NULL for callers who don't want any advanced features
> +	 * like read-time repair.
> +	 * In that case, @fs_info must be properly initialized.
> +	 */
>  	struct btrfs_inode *inode;
> -	u64 file_offset;
> +
> +	union {
> +		/* If @inode is initialized. */
> +		u64 file_offset;
> +
> +		/* If @inode is NULL. */
> +		struct btrfs_fs_info *fs_info;
> +	};

Ugg.  Let's not go into too crazy unions here that make the code
alsmost impossible to use correctly.  If we want to get away without
the inode we just need an extra unconditional fs_info field, which
is what most users of ->inode actually use.  I can take care of that.
Qu Wenruo March 15, 2023, 8:04 a.m. UTC | #2
On 2023/3/15 15:54, Christoph Hellwig wrote:
>> +	 * @inode can be NULL for callers who don't want any advanced features
>> +	 * like read-time repair.
>> +	 * In that case, @fs_info must be properly initialized.
>> +	 */
>>   	struct btrfs_inode *inode;
>> -	u64 file_offset;
>> +
>> +	union {
>> +		/* If @inode is initialized. */
>> +		u64 file_offset;
>> +
>> +		/* If @inode is NULL. */
>> +		struct btrfs_fs_info *fs_info;
>> +	};
> 
> Ugg.  Let's not go into too crazy unions here that make the code
> alsmost impossible to use correctly.  If we want to get away without
> the inode we just need an extra unconditional fs_info field, which
> is what most users of ->inode actually use.  I can take care of that.

If the memory usage is not a big deal, then I'm totally fine with a 
dedicated fs_info member.

Thanks,
Qu
Christoph Hellwig March 15, 2023, 8:07 a.m. UTC | #3
On Wed, Mar 15, 2023 at 04:04:19PM +0800, Qu Wenruo wrote:
> > Ugg.  Let's not go into too crazy unions here that make the code
> > alsmost impossible to use correctly.  If we want to get away without
> > the inode we just need an extra unconditional fs_info field, which
> > is what most users of ->inode actually use.  I can take care of that.
> 
> If the memory usage is not a big deal, then I'm totally fine with a
> dedicated fs_info member.

According to pahole we still have a fair amount of space in the
btrfs_bio before increasing the allocation.  While I don't want to use
it lightly, this seems like a good use case.

With a little more work I might be able to move the inode into a
union leg only used for data writes as well.
Qu Wenruo March 15, 2023, 8:12 a.m. UTC | #4
On 2023/3/15 16:07, Christoph Hellwig wrote:
> On Wed, Mar 15, 2023 at 04:04:19PM +0800, Qu Wenruo wrote:
>>> Ugg.  Let's not go into too crazy unions here that make the code
>>> alsmost impossible to use correctly.  If we want to get away without
>>> the inode we just need an extra unconditional fs_info field, which
>>> is what most users of ->inode actually use.  I can take care of that.
>>
>> If the memory usage is not a big deal, then I'm totally fine with a
>> dedicated fs_info member.
> 
> According to pahole we still have a fair amount of space in the
> btrfs_bio before increasing the allocation.  While I don't want to use
> it lightly, this seems like a good use case.

Sounds pretty reasonable.

So that's would be the way to go.

Thanks,
Qu

> 
> With a little more work I might be able to move the inode into a
> union leg only used for data writes as well.
diff mbox series

Patch

diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 726592868e9c..279bee2015d6 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -305,8 +305,8 @@  static void btrfs_end_bio_work(struct work_struct *work)
 {
 	struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
 
-	/* Metadata reads are checked and repaired by the submitter. */
-	if (bbio->bio.bi_opf & REQ_META)
+	/* Metadata or scrub reads are checked and repaired by the submitter. */
+	if (bbio->bio.bi_opf & REQ_META || !bbio->inode)
 		bbio->end_io(bbio);
 	else
 		btrfs_check_read_bio(bbio, bbio->bio.bi_private);
@@ -316,7 +316,8 @@  static void btrfs_simple_end_io(struct bio *bio)
 {
 	struct btrfs_bio *bbio = btrfs_bio(bio);
 	struct btrfs_device *dev = bio->bi_private;
-	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
+	struct btrfs_fs_info *fs_info = bbio->inode ?
+		bbio->inode->root->fs_info : bbio->fs_info;
 
 	btrfs_bio_counter_dec(fs_info);
 
@@ -340,7 +341,8 @@  static void btrfs_raid56_end_io(struct bio *bio)
 
 	btrfs_bio_counter_dec(bioc->fs_info);
 	bbio->mirror_num = bioc->mirror_num;
-	if (bio_op(bio) == REQ_OP_READ && !(bbio->bio.bi_opf & REQ_META))
+	if (bio_op(bio) == REQ_OP_READ && bbio->inode &&
+	    !(bbio->bio.bi_opf & REQ_META))
 		btrfs_check_read_bio(bbio, NULL);
 	else
 		btrfs_orig_bbio_end_io(bbio);
@@ -686,6 +688,46 @@  static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)
 	return true;
 }
 
+/*
+ * Scrub read special version, with extra limits:
+ *
+ * - Only support read for scrub usage
+ * - @mirror_num must be >0
+ * - No read-time repair nor checksum verification.
+ * - The @bbio must not cross stripe boundary.
+ */
+void btrfs_submit_scrub_read(struct btrfs_fs_info *fs_info,
+			     struct btrfs_bio *bbio, int mirror_num)
+{
+	struct btrfs_bio *orig_bbio = bbio;
+	u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+	u64 length = bbio->bio.bi_iter.bi_size;
+	u64 map_length = length;
+	struct btrfs_io_context *bioc = NULL;
+	struct btrfs_io_stripe smap;
+	int ret;
+
+	ASSERT(mirror_num > 0);
+	ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_READ);
+	ASSERT(!bbio->inode);
+
+	bbio->fs_info = fs_info;
+	btrfs_bio_counter_inc_blocked(fs_info);
+	ret = __btrfs_map_block(fs_info, btrfs_op(&bbio->bio), logical,
+				&map_length, &bioc, &smap, &mirror_num, 1);
+	if (ret)
+		goto fail;
+
+	/* Caller should ensure the @bbio doesn't cross stripe boundary. */
+	ASSERT(map_length >= length);
+	__btrfs_submit_bio(&bbio->bio, bioc, &smap, mirror_num);
+	return;
+
+fail:
+	btrfs_bio_counter_dec(fs_info);
+	btrfs_bio_end_io(orig_bbio, ret);
+}
+
 void btrfs_submit_bio(struct bio *bio, int mirror_num)
 {
 	while (!btrfs_submit_chunk(bio, mirror_num))
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index 873ff85817f0..cc24fbedd383 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -30,9 +30,22 @@  typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
  * passed to btrfs_submit_bio for mapping to the physical devices.
  */
 struct btrfs_bio {
-	/* Inode and offset into it that this I/O operates on. */
+	/*
+	 * Inode and offset into it that this I/O operates on.
+	 *
+	 * @inode can be NULL for callers who don't want any advanced features
+	 * like read-time repair.
+	 * In that case, @fs_info must be properly initialized.
+	 */
 	struct btrfs_inode *inode;
-	u64 file_offset;
+
+	union {
+		/* If @inode is initialized. */
+		u64 file_offset;
+
+		/* If @inode is NULL. */
+		struct btrfs_fs_info *fs_info;
+	};
 
 	union {
 		/*
@@ -89,6 +102,8 @@  static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
 #define REQ_BTRFS_ONE_ORDERED			REQ_DRV
 
 void btrfs_submit_bio(struct bio *bio, int mirror_num);
+void btrfs_submit_scrub_read(struct btrfs_fs_info *fs_info,
+			     struct btrfs_bio *bbio, int mirror_num);
 int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
 			    u64 length, u64 logical, struct page *page,
 			    unsigned int pg_offset, int mirror_num);