diff mbox series

[7/9] btrfs: fix a race in clearing the writeback bit for sub-page I/O

Message ID 20230724132701.816771-8-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/9] btrfs: don't stop integrity writeback too early | expand

Commit Message

Christoph Hellwig July 24, 2023, 1:26 p.m. UTC
For sub-page I/O, a fast I/O completion can clear the page writeback bit
in the I/O completion handler before subsequent bios were submitted.
Use a trick from iomap and defer submission of bios until we're reached
the end of the page to avoid this race.

Fixes: c5ef5c6c733a ("btrfs: make __extent_writepage_io() only submit dirty range for subpage")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/extent_io.c | 54 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 12 deletions(-)

Comments

Boris Burkov Aug. 3, 2023, 12:59 a.m. UTC | #1
On Mon, Jul 24, 2023 at 06:26:59AM -0700, Christoph Hellwig wrote:
> For sub-page I/O, a fast I/O completion can clear the page writeback bit
> in the I/O completion handler before subsequent bios were submitted.
> Use a trick from iomap and defer submission of bios until we're reached
> the end of the page to avoid this race.

This LGTM in that I don't see a bug.

However, I'm confused why it's exactly necessary: doesn't the existing
logic already only allocate one bio and calls bio_add_page on it in a
loop. And bio_add_page handles the subpage blocksize case.

As far as I can tell, it tries to do it sequentially so it should be
contiguous. Are you worried about non-contiguous writes within one page
resulting in separate bios? In that case, why would a completion clear
the writeback bit on the page if the whole page isn't written back? I
guess that could be tricky to do and this is the correct solution?

Thanks,
Boris

> 
> Fixes: c5ef5c6c733a ("btrfs: make __extent_writepage_io() only submit dirty range for subpage")
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Josef Bacik <josef@toxicpanda.com>
> ---
>  fs/btrfs/extent_io.c | 54 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 42 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index fada7a1931b130..48a49c57daa6fd 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -105,7 +105,8 @@ struct btrfs_bio_ctrl {
>  	struct writeback_control *wbc;
>  };
>  
> -static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
> +static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl,
> +			   struct bio_list *bio_list)
>  {
>  	struct btrfs_bio *bbio = bio_ctrl->bbio;
>  
> @@ -118,6 +119,8 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
>  	if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ &&
>  	    bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
>  		btrfs_submit_compressed_read(bbio);
> +	else if (bio_list)
> +		bio_list_add(bio_list, &bbio->bio);
>  	else
>  		btrfs_submit_bio(bbio, 0);
>  
> @@ -141,7 +144,22 @@ static void submit_write_bio(struct btrfs_bio_ctrl *bio_ctrl, int ret)
>  		/* The bio is owned by the end_io handler now */
>  		bio_ctrl->bbio = NULL;
>  	} else {
> -		submit_one_bio(bio_ctrl);
> +		submit_one_bio(bio_ctrl, NULL);
> +	}
> +}
> +
> +static void btrfs_submit_pending_bios(struct bio_list *bio_list, int ret)
> +{
> +	struct bio *bio;
> +
> +	if (ret) {
> +		blk_status_t status = errno_to_blk_status(ret);
> +
> +		while ((bio = bio_list_pop(bio_list)))
> +			btrfs_bio_end_io(btrfs_bio(bio), status);
> +	} else {
> +		while ((bio = bio_list_pop(bio_list)))
> +			btrfs_submit_bio(btrfs_bio(bio), 0);
>  	}
>  }
>  
> @@ -791,7 +809,8 @@ static void alloc_new_bio(struct btrfs_inode *inode,
>   */
>  static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  			       u64 disk_bytenr, struct page *page,
> -			       size_t size, unsigned long pg_offset)
> +			       size_t size, unsigned long pg_offset,
> +			       struct bio_list *bio_list)
>  {
>  	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
>  
> @@ -800,7 +819,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  
>  	if (bio_ctrl->bbio &&
>  	    !btrfs_bio_is_contig(bio_ctrl, page, disk_bytenr, pg_offset))
> -		submit_one_bio(bio_ctrl);
> +		submit_one_bio(bio_ctrl, bio_list);
>  
>  	do {
>  		u32 len = size;
> @@ -820,7 +839,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  
>  		if (bio_add_page(&bio_ctrl->bbio->bio, page, len, pg_offset) != len) {
>  			/* bio full: move on to a new one */
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, bio_list);
>  			continue;
>  		}
>  
> @@ -834,7 +853,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
>  
>  		/* Ordered extent boundary: move on to a new bio. */
>  		if (bio_ctrl->len_to_oe_boundary == 0)
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, bio_list);
>  	} while (size);
>  }
>  
> @@ -1082,14 +1101,14 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
>  		}
>  
>  		if (bio_ctrl->compress_type != compress_type) {
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, NULL);
>  			bio_ctrl->compress_type = compress_type;
>  		}
>  
>  		if (force_bio_submit)
> -			submit_one_bio(bio_ctrl);
> +			submit_one_bio(bio_ctrl, NULL);
>  		submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
> -				   pg_offset);
> +				   pg_offset, NULL);
>  		cur = cur + iosize;
>  		pg_offset += iosize;
>  	}
> @@ -1113,7 +1132,7 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
>  	 * If btrfs_do_readpage() failed we will want to submit the assembled
>  	 * bio to do the cleanup.
>  	 */
> -	submit_one_bio(&bio_ctrl);
> +	submit_one_bio(&bio_ctrl, NULL);
>  	return ret;
>  }
>  
> @@ -1287,6 +1306,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>  	u64 extent_offset;
>  	u64 block_start;
>  	struct extent_map *em;
> +	struct bio_list bio_list = BIO_EMPTY_LIST;
>  	int ret = 0;
>  	int nr = 0;
>  
> @@ -1365,7 +1385,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>  		btrfs_page_clear_dirty(fs_info, page, cur, iosize);
>  
>  		submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
> -				   cur - page_offset(page));
> +				   cur - page_offset(page), &bio_list);
>  		cur += iosize;
>  		nr++;
>  	}
> @@ -1378,6 +1398,16 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
>  		set_page_writeback(page);
>  		end_page_writeback(page);
>  	}
> +
> +	/*
> +	 * Submit all bios we queued up for this page.
> +	 *
> +	 * The bios are not submitted directly after building them as otherwise
> +	 * a very fast I/O completion on an earlier bio could clear the page
> +	 * writeback bit before the subsequent bios are even submitted.
> +	 */
> +	btrfs_submit_pending_bios(&bio_list, ret);
> +
>  	if (ret) {
>  		u32 remaining = end + 1 - cur;
>  
> @@ -2243,7 +2273,7 @@ void extent_readahead(struct readahead_control *rac)
>  
>  	if (em_cached)
>  		free_extent_map(em_cached);
> -	submit_one_bio(&bio_ctrl);
> +	submit_one_bio(&bio_ctrl, NULL);
>  }
>  
>  /*
> -- 
> 2.39.2
>
diff mbox series

Patch

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fada7a1931b130..48a49c57daa6fd 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -105,7 +105,8 @@  struct btrfs_bio_ctrl {
 	struct writeback_control *wbc;
 };
 
-static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
+static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl,
+			   struct bio_list *bio_list)
 {
 	struct btrfs_bio *bbio = bio_ctrl->bbio;
 
@@ -118,6 +119,8 @@  static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
 	if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ &&
 	    bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
 		btrfs_submit_compressed_read(bbio);
+	else if (bio_list)
+		bio_list_add(bio_list, &bbio->bio);
 	else
 		btrfs_submit_bio(bbio, 0);
 
@@ -141,7 +144,22 @@  static void submit_write_bio(struct btrfs_bio_ctrl *bio_ctrl, int ret)
 		/* The bio is owned by the end_io handler now */
 		bio_ctrl->bbio = NULL;
 	} else {
-		submit_one_bio(bio_ctrl);
+		submit_one_bio(bio_ctrl, NULL);
+	}
+}
+
+static void btrfs_submit_pending_bios(struct bio_list *bio_list, int ret)
+{
+	struct bio *bio;
+
+	if (ret) {
+		blk_status_t status = errno_to_blk_status(ret);
+
+		while ((bio = bio_list_pop(bio_list)))
+			btrfs_bio_end_io(btrfs_bio(bio), status);
+	} else {
+		while ((bio = bio_list_pop(bio_list)))
+			btrfs_submit_bio(btrfs_bio(bio), 0);
 	}
 }
 
@@ -791,7 +809,8 @@  static void alloc_new_bio(struct btrfs_inode *inode,
  */
 static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
 			       u64 disk_bytenr, struct page *page,
-			       size_t size, unsigned long pg_offset)
+			       size_t size, unsigned long pg_offset,
+			       struct bio_list *bio_list)
 {
 	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
 
@@ -800,7 +819,7 @@  static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
 
 	if (bio_ctrl->bbio &&
 	    !btrfs_bio_is_contig(bio_ctrl, page, disk_bytenr, pg_offset))
-		submit_one_bio(bio_ctrl);
+		submit_one_bio(bio_ctrl, bio_list);
 
 	do {
 		u32 len = size;
@@ -820,7 +839,7 @@  static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
 
 		if (bio_add_page(&bio_ctrl->bbio->bio, page, len, pg_offset) != len) {
 			/* bio full: move on to a new one */
-			submit_one_bio(bio_ctrl);
+			submit_one_bio(bio_ctrl, bio_list);
 			continue;
 		}
 
@@ -834,7 +853,7 @@  static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
 
 		/* Ordered extent boundary: move on to a new bio. */
 		if (bio_ctrl->len_to_oe_boundary == 0)
-			submit_one_bio(bio_ctrl);
+			submit_one_bio(bio_ctrl, bio_list);
 	} while (size);
 }
 
@@ -1082,14 +1101,14 @@  static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
 		}
 
 		if (bio_ctrl->compress_type != compress_type) {
-			submit_one_bio(bio_ctrl);
+			submit_one_bio(bio_ctrl, NULL);
 			bio_ctrl->compress_type = compress_type;
 		}
 
 		if (force_bio_submit)
-			submit_one_bio(bio_ctrl);
+			submit_one_bio(bio_ctrl, NULL);
 		submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
-				   pg_offset);
+				   pg_offset, NULL);
 		cur = cur + iosize;
 		pg_offset += iosize;
 	}
@@ -1113,7 +1132,7 @@  int btrfs_read_folio(struct file *file, struct folio *folio)
 	 * If btrfs_do_readpage() failed we will want to submit the assembled
 	 * bio to do the cleanup.
 	 */
-	submit_one_bio(&bio_ctrl);
+	submit_one_bio(&bio_ctrl, NULL);
 	return ret;
 }
 
@@ -1287,6 +1306,7 @@  static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 	u64 extent_offset;
 	u64 block_start;
 	struct extent_map *em;
+	struct bio_list bio_list = BIO_EMPTY_LIST;
 	int ret = 0;
 	int nr = 0;
 
@@ -1365,7 +1385,7 @@  static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		btrfs_page_clear_dirty(fs_info, page, cur, iosize);
 
 		submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
-				   cur - page_offset(page));
+				   cur - page_offset(page), &bio_list);
 		cur += iosize;
 		nr++;
 	}
@@ -1378,6 +1398,16 @@  static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		set_page_writeback(page);
 		end_page_writeback(page);
 	}
+
+	/*
+	 * Submit all bios we queued up for this page.
+	 *
+	 * The bios are not submitted directly after building them as otherwise
+	 * a very fast I/O completion on an earlier bio could clear the page
+	 * writeback bit before the subsequent bios are even submitted.
+	 */
+	btrfs_submit_pending_bios(&bio_list, ret);
+
 	if (ret) {
 		u32 remaining = end + 1 - cur;
 
@@ -2243,7 +2273,7 @@  void extent_readahead(struct readahead_control *rac)
 
 	if (em_cached)
 		free_extent_map(em_cached);
-	submit_one_bio(&bio_ctrl);
+	submit_one_bio(&bio_ctrl, NULL);
 }
 
 /*