diff mbox series

[03/34] btrfs: add a btrfs_inode pointer to struct btrfs_bio

Message ID 20230121065031.1139353-4-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [01/34] block: export bio_split_rw | expand

Commit Message

Christoph Hellwig Jan. 21, 2023, 6:50 a.m. UTC
All btrfs_bio I/Os are associated with an inode.  Add a pointer to that
inode, which will allow to simplify a lot of calling conventions, and
which will be needed in the I/O completion path in the future.

This grow the btrfs_bio struture by a pointer, but that grows will
be offset by the removal of the device pointer soon.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/bio.c         | 8 ++++++--
 fs/btrfs/bio.h         | 5 ++++-
 fs/btrfs/compression.c | 3 ++-
 fs/btrfs/extent_io.c   | 8 ++++----
 fs/btrfs/inode.c       | 4 +++-
 5 files changed, 19 insertions(+), 9 deletions(-)

Comments

Anand Jain Jan. 22, 2023, 10:20 a.m. UTC | #1
On 21/01/2023 14:50, Christoph Hellwig wrote:
> All btrfs_bio I/Os are associated with an inode.  Add a pointer to that
> inode, which will allow to simplify a lot of calling conventions, and
> which will be needed in the I/O completion path in the future.
> 
> This grow the btrfs_bio struture by a pointer, but that grows will
Nit: s/struture/structure

> be offset by the removal of the device pointer soon.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Johannes Thumshirn Jan. 23, 2023, 3:26 p.m. UTC | #2
On 21.01.23 07:50, Christoph Hellwig wrote:
> This grow the btrfs_bio struture by a pointer, but that grows will

s/struture/structure

Otherwise looks good,
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Johannes Thumshirn Jan. 23, 2023, 3:47 p.m. UTC | #3
Looks good,
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Qu Wenruo March 7, 2023, 1:44 a.m. UTC | #4
On 2023/1/21 14:50, Christoph Hellwig wrote:
> All btrfs_bio I/Os are associated with an inode.  Add a pointer to that
> inode, which will allow to simplify a lot of calling conventions, and
> which will be needed in the I/O completion path in the future.
> 
> This grow the btrfs_bio struture by a pointer, but that grows will
> be offset by the removal of the device pointer soon.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

With my recent restart on scrub rework, this patch makes me wonder, what 
if scrub wants to use btrfs_bio, but don't want to pass a valid 
btrfs_inode pointer?

E.g. scrub code just wants to read certain mirror of a logical bytenr.
This can simplify the handling of RAID56, as for data stripes the repair 
path is the same, just try the next mirror(s).

Furthermore most of the new btrfs_bio code is handling data reads by 
triggering read-repair automatically.
This can be unnecessary for scrub.

For now, I can workaround the behavior by setting REQ_META and pass
btree_inode as the inode, but this is only a workaround.
This can be problematic especially if we want to merge metadata and data 
verification behavior.

Any better ideas on this?


And since we're here, can we also have btrfs equivalent of on-stack bio?
As scrub can benefit a lot from that, as for sector-by-sector read, we 
want to avoid repeating allocating/freeing a btrfs_bio just for reading 
one sector.
(The existing behavior is using on-stack bio with bio_init/bio_uninit 
inside scrub_recheck_block())

Thanks,
Qu
> ---
>   fs/btrfs/bio.c         | 8 ++++++--
>   fs/btrfs/bio.h         | 5 ++++-
>   fs/btrfs/compression.c | 3 ++-
>   fs/btrfs/extent_io.c   | 8 ++++----
>   fs/btrfs/inode.c       | 4 +++-
>   5 files changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
> index 8affc88b0e0a4b..2398bb263957b2 100644
> --- a/fs/btrfs/bio.c
> +++ b/fs/btrfs/bio.c
> @@ -22,9 +22,11 @@ static struct bio_set btrfs_bioset;
>    * is already initialized by the block layer.
>    */
>   static inline void btrfs_bio_init(struct btrfs_bio *bbio,
> +				  struct btrfs_inode *inode,
>   				  btrfs_bio_end_io_t end_io, void *private)
>   {
>   	memset(bbio, 0, offsetof(struct btrfs_bio, bio));
> +	bbio->inode = inode;
>   	bbio->end_io = end_io;
>   	bbio->private = private;
>   }
> @@ -37,16 +39,18 @@ static inline void btrfs_bio_init(struct btrfs_bio *bbio,
>    * a mempool.
>    */
>   struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
> +			    struct btrfs_inode *inode,
>   			    btrfs_bio_end_io_t end_io, void *private)
>   {
>   	struct bio *bio;
>   
>   	bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
> -	btrfs_bio_init(btrfs_bio(bio), end_io, private);
> +	btrfs_bio_init(btrfs_bio(bio), inode, end_io, private);
>   	return bio;
>   }
>   
>   struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
> +				    struct btrfs_inode *inode,
>   				    btrfs_bio_end_io_t end_io, void *private)
>   {
>   	struct bio *bio;
> @@ -56,7 +60,7 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
>   
>   	bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
>   	bbio = btrfs_bio(bio);
> -	btrfs_bio_init(bbio, end_io, private);
> +	btrfs_bio_init(bbio, inode, end_io, private);
>   
>   	bio_trim(bio, offset >> 9, size >> 9);
>   	bbio->iter = bio->bi_iter;
> diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
> index baaa27961cc812..8d69d0b226d99b 100644
> --- a/fs/btrfs/bio.h
> +++ b/fs/btrfs/bio.h
> @@ -41,7 +41,8 @@ struct btrfs_bio {
>   	unsigned int is_metadata:1;
>   	struct bvec_iter iter;
>   
> -	/* File offset that this I/O operates on. */
> +	/* Inode and offset into it that this I/O operates on. */
> +	struct btrfs_inode *inode;
>   	u64 file_offset;
>   
>   	/* @device is for stripe IO submission. */
> @@ -80,8 +81,10 @@ int __init btrfs_bioset_init(void);
>   void __cold btrfs_bioset_exit(void);
>   
>   struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
> +			    struct btrfs_inode *inode,
>   			    btrfs_bio_end_io_t end_io, void *private);
>   struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
> +				    struct btrfs_inode *inode,
>   				    btrfs_bio_end_io_t end_io, void *private);
>   
>   
> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
> index 4a5aeb8dd4793a..b8e3e899974b34 100644
> --- a/fs/btrfs/compression.c
> +++ b/fs/btrfs/compression.c
> @@ -344,7 +344,8 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
>   	struct bio *bio;
>   	int ret;
>   
> -	bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, endio_func, cb);
> +	bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, BTRFS_I(cb->inode), endio_func,
> +			      cb);
>   	bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
>   
>   	em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize);
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 9bd32daa9b9a6f..faf9312a46c0e1 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -740,7 +740,8 @@ int btrfs_repair_one_sector(struct btrfs_inode *inode, struct btrfs_bio *failed_
>   		return -EIO;
>   	}
>   
> -	repair_bio = btrfs_bio_alloc(1, REQ_OP_READ, failed_bbio->end_io,
> +	repair_bio = btrfs_bio_alloc(1, REQ_OP_READ, failed_bbio->inode,
> +				     failed_bbio->end_io,
>   				     failed_bbio->private);
>   	repair_bbio = btrfs_bio(repair_bio);
>   	repair_bbio->file_offset = start;
> @@ -1394,9 +1395,8 @@ static int alloc_new_bio(struct btrfs_inode *inode,
>   	struct bio *bio;
>   	int ret;
>   
> -	ASSERT(bio_ctrl->end_io_func);
> -
> -	bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, bio_ctrl->end_io_func, NULL);
> +	bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, inode, bio_ctrl->end_io_func,
> +			      NULL);
>   	/*
>   	 * For compressed page range, its disk_bytenr is always @disk_bytenr
>   	 * passed in, no matter if we have added any range into previous bio.
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 3c49742f0d4556..0a85e42f114cc5 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -8097,7 +8097,8 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
>   		 * the allocation is backed by btrfs_bioset.
>   		 */
>   		bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len,
> -					      btrfs_end_dio_bio, dip);
> +					      BTRFS_I(inode), btrfs_end_dio_bio,
> +					      dip);
>   		btrfs_bio(bio)->file_offset = file_offset;
>   
>   		if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
> @@ -10409,6 +10410,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
>   
>   			if (!bio) {
>   				bio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ,
> +						      inode,
>   						      btrfs_encoded_read_endio,
>   						      &priv);
>   				bio->bi_iter.bi_sector =
Christoph Hellwig March 7, 2023, 2:41 p.m. UTC | #5
On Tue, Mar 07, 2023 at 09:44:32AM +0800, Qu Wenruo wrote:
> With my recent restart on scrub rework, this patch makes me wonder, what if 
> scrub wants to use btrfs_bio, but don't want to pass a valid btrfs_inode 
> pointer?

The full inode is only really needed for the data repair code.  But a lot
of code uses the fs_info, which would have to be added as a separate
counter.  The other usage is the sync_writers counter, which is a bit
odd and should probably be keyed off the REQ_SYNC flag instead.

> E.g. scrub code just wants to read certain mirror of a logical bytenr.
> This can simplify the handling of RAID56, as for data stripes the repair 
> path is the same, just try the next mirror(s).
>
> Furthermore most of the new btrfs_bio code is handling data reads by 
> triggering read-repair automatically.
> This can be unnecessary for scrub.

This sounds like you don't want to use the btrfs_bio at all as you
don't rely on any of the functionality from it.

>
> And since we're here, can we also have btrfs equivalent of on-stack bio?
> As scrub can benefit a lot from that, as for sector-by-sector read, we want 
> to avoid repeating allocating/freeing a btrfs_bio just for reading one 
> sector.
> (The existing behavior is using on-stack bio with bio_init/bio_uninit 
> inside scrub_recheck_block())

You can do that right now by declaring a btrfs_bio on-stack and then
calling bio_init on the embedded bio followed by a btrfs_bio_init on
the btrfs_bio.  But I don't think doing this will actually be a win
for the scrub code in terms of speed or code size.
Qu Wenruo March 7, 2023, 10:21 p.m. UTC | #6
On 2023/3/7 22:41, Christoph Hellwig wrote:
> On Tue, Mar 07, 2023 at 09:44:32AM +0800, Qu Wenruo wrote:
>> With my recent restart on scrub rework, this patch makes me wonder, what if
>> scrub wants to use btrfs_bio, but don't want to pass a valid btrfs_inode
>> pointer?
> 
> The full inode is only really needed for the data repair code.  But a lot
> of code uses the fs_info, which would have to be added as a separate
> counter.  The other usage is the sync_writers counter, which is a bit
> odd and should probably be keyed off the REQ_SYNC flag instead.
> 
>> E.g. scrub code just wants to read certain mirror of a logical bytenr.
>> This can simplify the handling of RAID56, as for data stripes the repair
>> path is the same, just try the next mirror(s).
>>
>> Furthermore most of the new btrfs_bio code is handling data reads by
>> triggering read-repair automatically.
>> This can be unnecessary for scrub.
> 
> This sounds like you don't want to use the btrfs_bio at all as you
> don't rely on any of the functionality from it.

Well, to me the proper mirror_num based read is the core of btrfs_bio, 
not the read-repair thing.

Thus I'm not that convinced fully automatic read-repair integrated into 
btrfs_bio is a good idea.

Thanks,
Qu
> 
>>
>> And since we're here, can we also have btrfs equivalent of on-stack bio?
>> As scrub can benefit a lot from that, as for sector-by-sector read, we want
>> to avoid repeating allocating/freeing a btrfs_bio just for reading one
>> sector.
>> (The existing behavior is using on-stack bio with bio_init/bio_uninit
>> inside scrub_recheck_block())
> 
> You can do that right now by declaring a btrfs_bio on-stack and then
> calling bio_init on the embedded bio followed by a btrfs_bio_init on
> the btrfs_bio.  But I don't think doing this will actually be a win
> for the scrub code in terms of speed or code size.
Qu Wenruo March 8, 2023, 6:04 a.m. UTC | #7
On 2023/3/8 06:21, Qu Wenruo wrote:
> 
> 
> On 2023/3/7 22:41, Christoph Hellwig wrote:
>> On Tue, Mar 07, 2023 at 09:44:32AM +0800, Qu Wenruo wrote:
>>> With my recent restart on scrub rework, this patch makes me wonder, 
>>> what if
>>> scrub wants to use btrfs_bio, but don't want to pass a valid btrfs_inode
>>> pointer?
>>
>> The full inode is only really needed for the data repair code.  But a lot
>> of code uses the fs_info, which would have to be added as a separate
>> counter.  The other usage is the sync_writers counter, which is a bit
>> odd and should probably be keyed off the REQ_SYNC flag instead.
>>
>>> E.g. scrub code just wants to read certain mirror of a logical bytenr.
>>> This can simplify the handling of RAID56, as for data stripes the repair
>>> path is the same, just try the next mirror(s).
>>>
>>> Furthermore most of the new btrfs_bio code is handling data reads by
>>> triggering read-repair automatically.
>>> This can be unnecessary for scrub.
>>
>> This sounds like you don't want to use the btrfs_bio at all as you
>> don't rely on any of the functionality from it.
> 
> Well, to me the proper mirror_num based read is the core of btrfs_bio, 
> not the read-repair thing.
> 
> Thus I'm not that convinced fully automatic read-repair integrated into 
> btrfs_bio is a good idea.

BTW, I also checked if I can craft a scrub specific version of 
btrfs_submit_bio().

The result doesn't look good at all.

Without a btrfs_bio structure, it's already pretty hard to properly put 
bioc, decrease the bio counter.

Or I need to create a scrub_bio, and re-implement all the needed endio 
function handling.

So please really consider the simplest case, one just wants to 
read/write some data using logical + mirror_num, without any btrfs inode 
nor csum verification.

Thanks,
Qu

> 
> Thanks,
> Qu
>>
>>>
>>> And since we're here, can we also have btrfs equivalent of on-stack bio?
>>> As scrub can benefit a lot from that, as for sector-by-sector read, 
>>> we want
>>> to avoid repeating allocating/freeing a btrfs_bio just for reading one
>>> sector.
>>> (The existing behavior is using on-stack bio with bio_init/bio_uninit
>>> inside scrub_recheck_block())
>>
>> You can do that right now by declaring a btrfs_bio on-stack and then
>> calling bio_init on the embedded bio followed by a btrfs_bio_init on
>> the btrfs_bio.  But I don't think doing this will actually be a win
>> for the scrub code in terms of speed or code size.
Christoph Hellwig March 8, 2023, 2:28 p.m. UTC | #8
On Wed, Mar 08, 2023 at 02:04:26PM +0800, Qu Wenruo wrote:
> BTW, I also checked if I can craft a scrub specific version of 
> btrfs_submit_bio().
>
> The result doesn't look good at all.
>
> Without a btrfs_bio structure, it's already pretty hard to properly put 
> bioc, decrease the bio counter.
>
> Or I need to create a scrub_bio, and re-implement all the needed endio 
> function handling.
>
> So please really consider the simplest case, one just wants to read/write 
> some data using logical + mirror_num, without any btrfs inode nor csum 
> verification.

As said before with a little more work we could get away without the
inode.  But the above sounds a little strange to me.  Can you share
your current code?  Maybe I can come up with some better ideas.
diff mbox series

Patch

diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 8affc88b0e0a4b..2398bb263957b2 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -22,9 +22,11 @@  static struct bio_set btrfs_bioset;
  * is already initialized by the block layer.
  */
 static inline void btrfs_bio_init(struct btrfs_bio *bbio,
+				  struct btrfs_inode *inode,
 				  btrfs_bio_end_io_t end_io, void *private)
 {
 	memset(bbio, 0, offsetof(struct btrfs_bio, bio));
+	bbio->inode = inode;
 	bbio->end_io = end_io;
 	bbio->private = private;
 }
@@ -37,16 +39,18 @@  static inline void btrfs_bio_init(struct btrfs_bio *bbio,
  * a mempool.
  */
 struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
+			    struct btrfs_inode *inode,
 			    btrfs_bio_end_io_t end_io, void *private)
 {
 	struct bio *bio;
 
 	bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
-	btrfs_bio_init(btrfs_bio(bio), end_io, private);
+	btrfs_bio_init(btrfs_bio(bio), inode, end_io, private);
 	return bio;
 }
 
 struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
+				    struct btrfs_inode *inode,
 				    btrfs_bio_end_io_t end_io, void *private)
 {
 	struct bio *bio;
@@ -56,7 +60,7 @@  struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
 
 	bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
 	bbio = btrfs_bio(bio);
-	btrfs_bio_init(bbio, end_io, private);
+	btrfs_bio_init(bbio, inode, end_io, private);
 
 	bio_trim(bio, offset >> 9, size >> 9);
 	bbio->iter = bio->bi_iter;
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index baaa27961cc812..8d69d0b226d99b 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -41,7 +41,8 @@  struct btrfs_bio {
 	unsigned int is_metadata:1;
 	struct bvec_iter iter;
 
-	/* File offset that this I/O operates on. */
+	/* Inode and offset into it that this I/O operates on. */
+	struct btrfs_inode *inode;
 	u64 file_offset;
 
 	/* @device is for stripe IO submission. */
@@ -80,8 +81,10 @@  int __init btrfs_bioset_init(void);
 void __cold btrfs_bioset_exit(void);
 
 struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
+			    struct btrfs_inode *inode,
 			    btrfs_bio_end_io_t end_io, void *private);
 struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
+				    struct btrfs_inode *inode,
 				    btrfs_bio_end_io_t end_io, void *private);
 
 
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 4a5aeb8dd4793a..b8e3e899974b34 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -344,7 +344,8 @@  static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
 	struct bio *bio;
 	int ret;
 
-	bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, endio_func, cb);
+	bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, BTRFS_I(cb->inode), endio_func,
+			      cb);
 	bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
 
 	em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9bd32daa9b9a6f..faf9312a46c0e1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -740,7 +740,8 @@  int btrfs_repair_one_sector(struct btrfs_inode *inode, struct btrfs_bio *failed_
 		return -EIO;
 	}
 
-	repair_bio = btrfs_bio_alloc(1, REQ_OP_READ, failed_bbio->end_io,
+	repair_bio = btrfs_bio_alloc(1, REQ_OP_READ, failed_bbio->inode,
+				     failed_bbio->end_io,
 				     failed_bbio->private);
 	repair_bbio = btrfs_bio(repair_bio);
 	repair_bbio->file_offset = start;
@@ -1394,9 +1395,8 @@  static int alloc_new_bio(struct btrfs_inode *inode,
 	struct bio *bio;
 	int ret;
 
-	ASSERT(bio_ctrl->end_io_func);
-
-	bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, bio_ctrl->end_io_func, NULL);
+	bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, inode, bio_ctrl->end_io_func,
+			      NULL);
 	/*
 	 * For compressed page range, its disk_bytenr is always @disk_bytenr
 	 * passed in, no matter if we have added any range into previous bio.
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3c49742f0d4556..0a85e42f114cc5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8097,7 +8097,8 @@  static void btrfs_submit_direct(const struct iomap_iter *iter,
 		 * the allocation is backed by btrfs_bioset.
 		 */
 		bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len,
-					      btrfs_end_dio_bio, dip);
+					      BTRFS_I(inode), btrfs_end_dio_bio,
+					      dip);
 		btrfs_bio(bio)->file_offset = file_offset;
 
 		if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
@@ -10409,6 +10410,7 @@  int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
 
 			if (!bio) {
 				bio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ,
+						      inode,
 						      btrfs_encoded_read_endio,
 						      &priv);
 				bio->bi_iter.bi_sector =