diff mbox series

[v2,04/15] btrfs: look at full bi_io_vec for repair decision

Message ID fbfeded0671dbb2ef682478aebd5693f96fbdeb6.1587072977.git.osandov@fb.com (mailing list archive)
State New, archived
Headers show
Series btrfs: read repair/direct I/O improvements | expand

Commit Message

Omar Sandoval April 16, 2020, 9:46 p.m. UTC
From: Omar Sandoval <osandov@fb.com>

Read repair does two things: it finds a good copy of data to return to
the reader, and it corrects the bad copy on disk. If a read of multiple
sectors has an I/O error, repair does an extra "validation" step that
issues a separate read for each sector. This allows us to find the exact
failing sectors and only rewrite those.

This heuristic is implemented in
bio_readpage_error()/btrfs_check_repairable() as:

	failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
	if (failed_bio_pages > 1)
		do validation

However, at this point, bi_iter may have already been advanced. This
means that we'll skip the validation step and rewrite the entire failed
read.

Fix it by getting the actual size from the biovec (which we can do
because this is only called for non-cloned bios, although that will
change in a later commit).

Fixes: 8a2ee44a371c ("btrfs: look at bi_size for repair decisions")
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 fs/btrfs/extent_io.c | 33 +++++++++++++++++++++++++++------
 fs/btrfs/extent_io.h |  5 +++--
 2 files changed, 30 insertions(+), 8 deletions(-)

Comments

Johannes Thumshirn April 17, 2020, 5:56 p.m. UTC | #1
Looks good,
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
diff mbox series

Patch

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 39e45b8a5031..712f49607d3a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2537,8 +2537,9 @@  int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
 	return 0;
 }
 
-bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
-			   struct io_failure_record *failrec, int failed_mirror)
+bool btrfs_check_repairable(struct inode *inode, bool need_validation,
+			    struct io_failure_record *failrec,
+			    int failed_mirror)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	int num_copies;
@@ -2561,7 +2562,7 @@  bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
 	 *	a) deliver good data to the caller
 	 *	b) correct the bad sectors on disk
 	 */
-	if (failed_bio_pages > 1) {
+	if (need_validation) {
 		/*
 		 * to fulfill b), we need to know the exact failing sectors, as
 		 * we don't want to rewrite any more than the failed ones. thus,
@@ -2633,6 +2634,24 @@  struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
 	return bio;
 }
 
+static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
+{
+	struct bio_vec *bvec;
+	u64 len = 0;
+	int i;
+
+	/*
+	 * We need to validate each sector individually if the failed I/O was
+	 * for multiple sectors.
+	 */
+	bio_for_each_bvec_all(bvec, bio, i) {
+		len += bvec->bv_len;
+		if (len > inode->i_sb->s_blocksize)
+			return true;
+	}
+	return false;
+}
+
 /*
  * This is a generic handler for readpage errors. If other copies exist, read
  * those and write back good data to the failed position. Does not investigate
@@ -2647,11 +2666,11 @@  static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	struct inode *inode = page->mapping->host;
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+	bool need_validation;
 	struct bio *bio;
 	int read_mode = 0;
 	blk_status_t status;
 	int ret;
-	unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
@@ -2659,13 +2678,15 @@  static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	if (ret)
 		return ret;
 
-	if (!btrfs_check_repairable(inode, failed_bio_pages, failrec,
+	need_validation = btrfs_io_needs_validation(inode, failed_bio);
+
+	if (!btrfs_check_repairable(inode, need_validation, failrec,
 				    failed_mirror)) {
 		free_io_failure(failure_tree, tree, failrec);
 		return -EIO;
 	}
 
-	if (failed_bio_pages > 1)
+	if (need_validation)
 		read_mode |= REQ_FAILFAST_DEV;
 
 	phy_offset >>= inode->i_sb->s_blocksize_bits;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 2ed65bd0760e..26c0fce0bb64 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -312,8 +312,9 @@  struct io_failure_record {
 };
 
 
-bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
-			    struct io_failure_record *failrec, int fail_mirror);
+bool btrfs_check_repairable(struct inode *inode, bool need_validation,
+			    struct io_failure_record *failrec,
+			    int failed_mirror);
 struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
 				    struct io_failure_record *failrec,
 				    struct page *page, int pg_offset, int icsum,