diff mbox series

[11/15] btrfs: put direct I/O checksums in btrfs_dio_private instead of bio

Message ID 95b275ed47f1e4bdaba53040fe6de9eefdf3a5fd.1583789410.git.osandov@fb.com (mailing list archive)
State New, archived
Headers show
Series btrfs: read repair/direct I/O improvements | expand

Commit Message

Omar Sandoval March 9, 2020, 9:32 p.m. UTC
From: Omar Sandoval <osandov@fb.com>

The next commit will get rid of btrfs_dio_private->orig_bio. The only
thing we really need it for is containing all of the checksums, but we
can easily put those in btrfs_dio_private and get rid of the awkward
logic that looks up the checksums for orig_bio when the first split bio
is submitted. (Interestingly, btrfs_dio_private did contain the
checksums before commit 23ea8e5a0767 ("Btrfs: load checksum data once
when submitting a direct read io"), but it didn't look them up up
front.)

Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 fs/btrfs/inode.c | 79 ++++++++++++++++++++++++------------------------
 1 file changed, 39 insertions(+), 40 deletions(-)

Comments

Josef Bacik March 11, 2020, 6:04 p.m. UTC | #1
On 3/9/20 5:32 PM, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> The next commit will get rid of btrfs_dio_private->orig_bio. The only
> thing we really need it for is containing all of the checksums, but we
> can easily put those in btrfs_dio_private and get rid of the awkward
> logic that looks up the checksums for orig_bio when the first split bio
> is submitted. (Interestingly, btrfs_dio_private did contain the
> checksums before commit 23ea8e5a0767 ("Btrfs: load checksum data once
> when submitting a direct read io"), but it didn't look them up up
> front.)
> 
> Signed-off-by: Omar Sandoval <osandov@fb.com>

Reviewed-by: Josef Bacik <josef@toxicpanda.com>

Thanks,

Josef
Nikolay Borisov March 17, 2020, 4:37 p.m. UTC | #2
On 9.03.20 г. 23:32 ч., Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> The next commit will get rid of btrfs_dio_private->orig_bio. The only
> thing we really need it for is containing all of the checksums, but we
> can easily put those in btrfs_dio_private and get rid of the awkward
> logic that looks up the checksums for orig_bio when the first split bio
> is submitted. (Interestingly, btrfs_dio_private did contain the
> checksums before commit 23ea8e5a0767 ("Btrfs: load checksum data once
> when submitting a direct read io"), but it didn't look them up up
> front.)

nit: It would be useful to surmise the structural changes this patch
does. Essentially this makes each individual cloned bio to index its
checksums into the global checksum storage array anchored at
btrfs_dio_private::sums.
> 
> Signed-off-by: Omar Sandoval <osandov@fb.com>

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
David Sterba April 3, 2020, 4:18 p.m. UTC | #3
On Mon, Mar 09, 2020 at 02:32:37PM -0700, Omar Sandoval wrote:
> -	file_offset -= dip->logical_offset;
> -	file_offset >>= inode->i_sb->s_blocksize_bits;
> -	csum_size = btrfs_super_csum_size(btrfs_sb(inode->i_sb)->super_copy);
> -	io_bio->csum = orig_io_bio->csum + csum_size * file_offset;

> -		ret = btrfs_lookup_and_bind_dio_csum(inode, dip, bio,
> -						     file_offset);
> -		if (ret)
> -			goto err;
> +		u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
> +		size_t csum_offset;
> +
> +		csum_offset = ((file_offset - dip->logical_offset) >>
> +			       inode->i_sb->s_blocksize_bits) * csum_size;

Please split the expression like it used to be in the original code
(above).

> +		btrfs_io_bio(bio)->csum = dip->sums + csum_offset;
>  	}
>  map:
>  	ret = btrfs_map_bio(fs_info, bio, 0);
> @@ -8047,13 +8018,25 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
>  				loff_t file_offset)
>  {
>  	struct btrfs_dio_private *dip = NULL;
> +	size_t dip_size;
>  	struct bio *bio = NULL;
>  	struct btrfs_io_bio *io_bio;
>  	bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
> +	const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
>  
>  	bio = btrfs_bio_clone(dio_bio);
>  
> -	dip = kzalloc(sizeof(*dip), GFP_NOFS);
> +	dip_size = sizeof(*dip);
> +	if (!write && csum) {
> +		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> +		u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
> +		size_t nblocks = (dio_bio->bi_iter.bi_size >>
> +				  inode->i_sb->s_blocksize_bits);

		nblocks = dio_bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;

This overflows 80 chars/line but is IMHO more readable than the split
line.
diff mbox series

Patch

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a7fb0ba8cde4..4a2e44f3e66e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -84,6 +84,9 @@  struct btrfs_dio_private {
 	 */
 	blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
 			blk_status_t);
+
+	/* Checksums. */
+	u8 sums[];
 };
 
 struct btrfs_dio_data {
@@ -7753,7 +7756,6 @@  static void btrfs_endio_direct_read(struct bio *bio)
 
 	dio_bio->bi_status = err;
 	dio_end_io(dio_bio);
-	btrfs_io_bio_free_csum(io_bio);
 	bio_put(bio);
 }
 
@@ -7865,39 +7867,6 @@  static void btrfs_end_dio_bio(struct bio *bio)
 	bio_put(bio);
 }
 
-static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
-						 struct btrfs_dio_private *dip,
-						 struct bio *bio,
-						 u64 file_offset)
-{
-	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-	struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
-	u16 csum_size;
-	blk_status_t ret;
-
-	/*
-	 * We load all the csum data we need when we submit
-	 * the first bio to reduce the csum tree search and
-	 * contention.
-	 */
-	if (dip->logical_offset == file_offset) {
-		ret = btrfs_lookup_bio_sums(inode, dip->orig_bio, file_offset,
-					    NULL);
-		if (ret)
-			return ret;
-	}
-
-	if (bio == dip->orig_bio)
-		return 0;
-
-	file_offset -= dip->logical_offset;
-	file_offset >>= inode->i_sb->s_blocksize_bits;
-	csum_size = btrfs_super_csum_size(btrfs_sb(inode->i_sb)->super_copy);
-	io_bio->csum = orig_io_bio->csum + csum_size * file_offset;
-
-	return 0;
-}
-
 static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 		struct inode *inode, u64 file_offset, int async_submit)
 {
@@ -7933,10 +7902,12 @@  static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 		if (ret)
 			goto err;
 	} else {
-		ret = btrfs_lookup_and_bind_dio_csum(inode, dip, bio,
-						     file_offset);
-		if (ret)
-			goto err;
+		u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+		size_t csum_offset;
+
+		csum_offset = ((file_offset - dip->logical_offset) >>
+			       inode->i_sb->s_blocksize_bits) * csum_size;
+		btrfs_io_bio(bio)->csum = dip->sums + csum_offset;
 	}
 map:
 	ret = btrfs_map_bio(fs_info, bio, 0);
@@ -8047,13 +8018,25 @@  static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
 				loff_t file_offset)
 {
 	struct btrfs_dio_private *dip = NULL;
+	size_t dip_size;
 	struct bio *bio = NULL;
 	struct btrfs_io_bio *io_bio;
 	bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
+	const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
 
 	bio = btrfs_bio_clone(dio_bio);
 
-	dip = kzalloc(sizeof(*dip), GFP_NOFS);
+	dip_size = sizeof(*dip);
+	if (!write && csum) {
+		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+		u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+		size_t nblocks = (dio_bio->bi_iter.bi_size >>
+				  inode->i_sb->s_blocksize_bits);
+
+		dip_size += csum_size * nblocks;
+	}
+
+	dip = kzalloc(dip_size, GFP_NOFS);
 	if (!dip) {
 		if (!write) {
 			unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
@@ -8093,11 +8076,27 @@  static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
 			dip->bytes;
 		dio_data->unsubmitted_oe_range_start =
 			dio_data->unsubmitted_oe_range_end;
-
 		bio->bi_end_io = btrfs_endio_direct_write;
 	} else {
 		bio->bi_end_io = btrfs_endio_direct_read;
 		dip->subio_endio = btrfs_subio_endio_read;
+
+		if (csum) {
+			blk_status_t status;
+
+			/*
+			 * Load the csums up front to reduce csum tree searches
+			 * and contention when submitting bios.
+			 */
+			status = btrfs_lookup_bio_sums(inode, dio_bio,
+						       file_offset, dip->sums);
+			if (status != BLK_STS_OK) {
+				dip->errors = 1;
+				if (refcount_dec_and_test(&dip->refs))
+					bio_io_error(dip->orig_bio);
+				return;
+			}
+		}
 	}
 
 	btrfs_submit_direct_hook(dip);