diff mbox series

[15/21] btrfs: remove the extent_buffer lookup in btree block checksumming

Message ID 20230503152441.1141019-16-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [01/21] btrfs: mark extent_buffer_under_io static | expand

Commit Message

Christoph Hellwig May 3, 2023, 3:24 p.m. UTC
The checksumming of btree blocks always operates on the entire
extent_buffer, and because btree blocks are always allocated contiguously
on disk they are never split by btrfs_submit_bio.

Simplify the checksumming code by finding the extent_buffer in the
btrfs_bio private data instead of trying to search through the bio_vec.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/disk-io.c | 121 ++++++++++-----------------------------------
 1 file changed, 25 insertions(+), 96 deletions(-)

Comments

Qu Wenruo May 26, 2023, 6:39 a.m. UTC | #1
On 2023/5/3 23:24, Christoph Hellwig wrote:
> The checksumming of btree blocks always operates on the entire
> extent_buffer, and because btree blocks are always allocated contiguously
> on disk they are never split by btrfs_submit_bio.
>
> Simplify the checksumming code by finding the extent_buffer in the
> btrfs_bio private data instead of trying to search through the bio_vec.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> Reviewed-by: Qu Wenruo <wqu@suse.com>
> ---
>   fs/btrfs/disk-io.c | 121 ++++++++++-----------------------------------
>   1 file changed, 25 insertions(+), 96 deletions(-)
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index a32469e0c1e4f4..d5c204bbb9786c 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -312,12 +312,35 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
>   	return ret;
>   }
>
> -static int csum_one_extent_buffer(struct extent_buffer *eb)
> +/*
> + * Checksum a dirty tree block before IO.
> + */
> +blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
>   {
> +	struct extent_buffer *eb = bbio->private;
>   	struct btrfs_fs_info *fs_info = eb->fs_info;
> +	u64 found_start = btrfs_header_bytenr(eb);
>   	u8 result[BTRFS_CSUM_SIZE];
>   	int ret;
>
> +	/*
> +	 * Btree blocks are always contiguous on disk.
> +	 */
> +	if (WARN_ON_ONCE(bbio->file_offset != eb->start))
> +		return BLK_STS_IOERR;
> +	if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len))
> +		return BLK_STS_IOERR;
> +
> +	if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
> +		WARN_ON_ONCE(found_start != 0);
> +		return BLK_STS_OK;
> +	}
> +
> +	if (WARN_ON_ONCE(found_start != eb->start))
> +		return BLK_STS_IOERR;
> +	if (WARN_ON_ONCE(!PageUptodate(eb->pages[0])))
> +		return BLK_STS_IOERR;

Unfortunately this is screwing up subpage support.

Previously subpage goes its own helper, which replaces the uptodate
check to using the subpage helper.

Since PageUptodate flag is only set when all eb in that page is uptodate.
If we have a hole in the page range, the page will never be marked Uptodate.

Thus for any subpage mount, this would lead to transaction abort during
metadata writeback.

We have btrfs_page_clamp_test_uptodate() for this usage.

Thanks,
Qu

> +
>   	ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
>   				    offsetof(struct btrfs_header, fsid),
>   				    BTRFS_FSID_SIZE) == 0);
> @@ -344,8 +367,7 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
>   		goto error;
>   	}
>   	write_extent_buffer(eb, result, 0, fs_info->csum_size);
> -
> -	return 0;
> +	return BLK_STS_OK;
>
>   error:
>   	btrfs_print_tree(eb, 0);
> @@ -359,99 +381,6 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
>   	 */
>   	WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
>   		btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
> -	return ret;
> -}
> -
> -/* Checksum all dirty extent buffers in one bio_vec */
> -static int csum_dirty_subpage_buffers(struct btrfs_fs_info *fs_info,
> -				      struct bio_vec *bvec)
> -{
> -	struct page *page = bvec->bv_page;
> -	u64 bvec_start = page_offset(page) + bvec->bv_offset;
> -	u64 cur;
> -	int ret = 0;
> -
> -	for (cur = bvec_start; cur < bvec_start + bvec->bv_len;
> -	     cur += fs_info->nodesize) {
> -		struct extent_buffer *eb;
> -		bool uptodate;
> -
> -		eb = find_extent_buffer(fs_info, cur);
> -		uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur,
> -						       fs_info->nodesize);
> -
> -		/* A dirty eb shouldn't disappear from buffer_radix */
> -		if (WARN_ON(!eb))
> -			return -EUCLEAN;
> -
> -		if (WARN_ON(cur != btrfs_header_bytenr(eb))) {
> -			free_extent_buffer(eb);
> -			return -EUCLEAN;
> -		}
> -		if (WARN_ON(!uptodate)) {
> -			free_extent_buffer(eb);
> -			return -EUCLEAN;
> -		}
> -
> -		ret = csum_one_extent_buffer(eb);
> -		free_extent_buffer(eb);
> -		if (ret < 0)
> -			return ret;
> -	}
> -	return ret;
> -}
> -
> -/*
> - * Checksum a dirty tree block before IO.  This has extra checks to make sure
> - * we only fill in the checksum field in the first page of a multi-page block.
> - * For subpage extent buffers we need bvec to also read the offset in the page.
> - */
> -static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec)
> -{
> -	struct page *page = bvec->bv_page;
> -	u64 start = page_offset(page);
> -	u64 found_start;
> -	struct extent_buffer *eb;
> -
> -	if (fs_info->nodesize < PAGE_SIZE)
> -		return csum_dirty_subpage_buffers(fs_info, bvec);
> -
> -	eb = (struct extent_buffer *)page->private;
> -	if (page != eb->pages[0])
> -		return 0;
> -
> -	found_start = btrfs_header_bytenr(eb);
> -
> -	if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
> -		WARN_ON(found_start != 0);
> -		return 0;
> -	}
> -
> -	/*
> -	 * Please do not consolidate these warnings into a single if.
> -	 * It is useful to know what went wrong.
> -	 */
> -	if (WARN_ON(found_start != start))
> -		return -EUCLEAN;
> -	if (WARN_ON(!PageUptodate(page)))
> -		return -EUCLEAN;
> -
> -	return csum_one_extent_buffer(eb);
> -}
> -
> -blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
> -{
> -	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
> -	struct bvec_iter iter;
> -	struct bio_vec bv;
> -	int ret = 0;
> -
> -	bio_for_each_segment(bv, &bbio->bio, iter) {
> -		ret = csum_dirty_buffer(fs_info, &bv);
> -		if (ret)
> -			break;
> -	}
> -
>   	return errno_to_blk_status(ret);
>   }
>
Christoph Hellwig May 26, 2023, 6:41 a.m. UTC | #2
On Fri, May 26, 2023 at 02:39:54PM +0800, Qu Wenruo wrote:
>> +	if (WARN_ON_ONCE(found_start != eb->start))
>> +		return BLK_STS_IOERR;
>> +	if (WARN_ON_ONCE(!PageUptodate(eb->pages[0])))
>> +		return BLK_STS_IOERR;
>
> Unfortunately this is screwing up subpage support.
>
> Previously subpage goes its own helper, which replaces the uptodate
> check to using the subpage helper.
>
> Since PageUptodate flag is only set when all eb in that page is uptodate.
> If we have a hole in the page range, the page will never be marked Uptodate.
>
> Thus for any subpage mount, this would lead to transaction abort during
> metadata writeback.
>
> We have btrfs_page_clamp_test_uptodate() for this usage.

True, this should use the sub page helper.  Or maybe just drop
this assert altogether?
Qu Wenruo May 26, 2023, 6:43 a.m. UTC | #3
On 2023/5/26 14:41, Christoph Hellwig wrote:
> On Fri, May 26, 2023 at 02:39:54PM +0800, Qu Wenruo wrote:
>>> +	if (WARN_ON_ONCE(found_start != eb->start))
>>> +		return BLK_STS_IOERR;
>>> +	if (WARN_ON_ONCE(!PageUptodate(eb->pages[0])))
>>> +		return BLK_STS_IOERR;
>>
>> Unfortunately this is screwing up subpage support.
>>
>> Previously subpage goes its own helper, which replaces the uptodate
>> check to using the subpage helper.
>>
>> Since PageUptodate flag is only set when all eb in that page is uptodate.
>> If we have a hole in the page range, the page will never be marked Uptodate.
>>
>> Thus for any subpage mount, this would lead to transaction abort during
>> metadata writeback.
>>
>> We have btrfs_page_clamp_test_uptodate() for this usage.
>
> True, this should use the sub page helper.  Or maybe just drop
> this assert altogether?

It may be better to keep it, as there is also another case related to
subpage fs got its PageUptodate and bitmaps de-synced.

Thus the assert can have a chance to catch such problem.

Thanks,
Qu
Christoph Hellwig May 26, 2023, 7:03 a.m. UTC | #4
On Fri, May 26, 2023 at 02:43:11PM +0800, Qu Wenruo wrote:
>>> Thus for any subpage mount, this would lead to transaction abort during
>>> metadata writeback.
>>>
>>> We have btrfs_page_clamp_test_uptodate() for this usage.
>>
>> True, this should use the sub page helper.  Or maybe just drop
>> this assert altogether?
>
> It may be better to keep it, as there is also another case related to
> subpage fs got its PageUptodate and bitmaps de-synced.
>
> Thus the assert can have a chance to catch such problem.

Ok.  I don't really think we need the clamp version here, though.
Does this work for you?

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c461a46ac6f207..36d6b8d4b2c1fa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -269,7 +269,8 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
 
 	if (WARN_ON_ONCE(found_start != eb->start))
 		return BLK_STS_IOERR;
-	if (WARN_ON_ONCE(!PageUptodate(eb->pages[0])))
+	if (WARN_ON(!btrfs_page_test_uptodate(fs_info, eb->pages[0],
+					      eb->start, eb->len)))
 		return BLK_STS_IOERR;
 
 	ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
Qu Wenruo May 26, 2023, 7:25 a.m. UTC | #5
On 2023/5/26 15:03, Christoph Hellwig wrote:
> On Fri, May 26, 2023 at 02:43:11PM +0800, Qu Wenruo wrote:
>>>> Thus for any subpage mount, this would lead to transaction abort during
>>>> metadata writeback.
>>>>
>>>> We have btrfs_page_clamp_test_uptodate() for this usage.
>>>
>>> True, this should use the sub page helper.  Or maybe just drop
>>> this assert altogether?
>>
>> It may be better to keep it, as there is also another case related to
>> subpage fs got its PageUptodate and bitmaps de-synced.
>>
>> Thus the assert can have a chance to catch such problem.
>
> Ok.  I don't really think we need the clamp version here, though.
> Does this work for you?

Yep, it works.

And you're right, no need for clamp. For subpage the range would be
inside the page anyway, and for regular cases, it's just PageUptodate().

Thanks,
Qu
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index c461a46ac6f207..36d6b8d4b2c1fa 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -269,7 +269,8 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
>
>   	if (WARN_ON_ONCE(found_start != eb->start))
>   		return BLK_STS_IOERR;
> -	if (WARN_ON_ONCE(!PageUptodate(eb->pages[0])))
> +	if (WARN_ON(!btrfs_page_test_uptodate(fs_info, eb->pages[0],
> +					      eb->start, eb->len)))
>   		return BLK_STS_IOERR;
>
>   	ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
David Sterba May 26, 2023, 1:58 p.m. UTC | #6
On Fri, May 26, 2023 at 03:25:25PM +0800, Qu Wenruo wrote:
> On 2023/5/26 15:03, Christoph Hellwig wrote:
> > On Fri, May 26, 2023 at 02:43:11PM +0800, Qu Wenruo wrote:
> >>>> Thus for any subpage mount, this would lead to transaction abort during
> >>>> metadata writeback.
> >>>>
> >>>> We have btrfs_page_clamp_test_uptodate() for this usage.
> >>>
> >>> True, this should use the sub page helper.  Or maybe just drop
> >>> this assert altogether?
> >>
> >> It may be better to keep it, as there is also another case related to
> >> subpage fs got its PageUptodate and bitmaps de-synced.
> >>
> >> Thus the assert can have a chance to catch such problem.
> >
> > Ok.  I don't really think we need the clamp version here, though.
> > Does this work for you?
> 
> Yep, it works.
> 
> And you're right, no need for clamp. For subpage the range would be
> inside the page anyway, and for regular cases, it's just PageUptodate().

Folded to "btrfs: remove the extent_buffer lookup in btree block
checksumming", thanks.
diff mbox series

Patch

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a32469e0c1e4f4..d5c204bbb9786c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -312,12 +312,35 @@  int btrfs_read_extent_buffer(struct extent_buffer *eb,
 	return ret;
 }
 
-static int csum_one_extent_buffer(struct extent_buffer *eb)
+/*
+ * Checksum a dirty tree block before IO.
+ */
+blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
 {
+	struct extent_buffer *eb = bbio->private;
 	struct btrfs_fs_info *fs_info = eb->fs_info;
+	u64 found_start = btrfs_header_bytenr(eb);
 	u8 result[BTRFS_CSUM_SIZE];
 	int ret;
 
+	/*
+	 * Btree blocks are always contiguous on disk.
+	 */
+	if (WARN_ON_ONCE(bbio->file_offset != eb->start))
+		return BLK_STS_IOERR;
+	if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len))
+		return BLK_STS_IOERR;
+
+	if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
+		WARN_ON_ONCE(found_start != 0);
+		return BLK_STS_OK;
+	}
+
+	if (WARN_ON_ONCE(found_start != eb->start))
+		return BLK_STS_IOERR;
+	if (WARN_ON_ONCE(!PageUptodate(eb->pages[0])))
+		return BLK_STS_IOERR;
+
 	ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
 				    offsetof(struct btrfs_header, fsid),
 				    BTRFS_FSID_SIZE) == 0);
@@ -344,8 +367,7 @@  static int csum_one_extent_buffer(struct extent_buffer *eb)
 		goto error;
 	}
 	write_extent_buffer(eb, result, 0, fs_info->csum_size);
-
-	return 0;
+	return BLK_STS_OK;
 
 error:
 	btrfs_print_tree(eb, 0);
@@ -359,99 +381,6 @@  static int csum_one_extent_buffer(struct extent_buffer *eb)
 	 */
 	WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
 		btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
-	return ret;
-}
-
-/* Checksum all dirty extent buffers in one bio_vec */
-static int csum_dirty_subpage_buffers(struct btrfs_fs_info *fs_info,
-				      struct bio_vec *bvec)
-{
-	struct page *page = bvec->bv_page;
-	u64 bvec_start = page_offset(page) + bvec->bv_offset;
-	u64 cur;
-	int ret = 0;
-
-	for (cur = bvec_start; cur < bvec_start + bvec->bv_len;
-	     cur += fs_info->nodesize) {
-		struct extent_buffer *eb;
-		bool uptodate;
-
-		eb = find_extent_buffer(fs_info, cur);
-		uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur,
-						       fs_info->nodesize);
-
-		/* A dirty eb shouldn't disappear from buffer_radix */
-		if (WARN_ON(!eb))
-			return -EUCLEAN;
-
-		if (WARN_ON(cur != btrfs_header_bytenr(eb))) {
-			free_extent_buffer(eb);
-			return -EUCLEAN;
-		}
-		if (WARN_ON(!uptodate)) {
-			free_extent_buffer(eb);
-			return -EUCLEAN;
-		}
-
-		ret = csum_one_extent_buffer(eb);
-		free_extent_buffer(eb);
-		if (ret < 0)
-			return ret;
-	}
-	return ret;
-}
-
-/*
- * Checksum a dirty tree block before IO.  This has extra checks to make sure
- * we only fill in the checksum field in the first page of a multi-page block.
- * For subpage extent buffers we need bvec to also read the offset in the page.
- */
-static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec)
-{
-	struct page *page = bvec->bv_page;
-	u64 start = page_offset(page);
-	u64 found_start;
-	struct extent_buffer *eb;
-
-	if (fs_info->nodesize < PAGE_SIZE)
-		return csum_dirty_subpage_buffers(fs_info, bvec);
-
-	eb = (struct extent_buffer *)page->private;
-	if (page != eb->pages[0])
-		return 0;
-
-	found_start = btrfs_header_bytenr(eb);
-
-	if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
-		WARN_ON(found_start != 0);
-		return 0;
-	}
-
-	/*
-	 * Please do not consolidate these warnings into a single if.
-	 * It is useful to know what went wrong.
-	 */
-	if (WARN_ON(found_start != start))
-		return -EUCLEAN;
-	if (WARN_ON(!PageUptodate(page)))
-		return -EUCLEAN;
-
-	return csum_one_extent_buffer(eb);
-}
-
-blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
-{
-	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
-	struct bvec_iter iter;
-	struct bio_vec bv;
-	int ret = 0;
-
-	bio_for_each_segment(bv, &bbio->bio, iter) {
-		ret = csum_dirty_buffer(fs_info, &bv);
-		if (ret)
-			break;
-	}
-
 	return errno_to_blk_status(ret);
 }