diff mbox series

[4/7] btrfs: add new read repair infrastructure

Message ID c79f35aea568ff3c1aa9b68b1bd6ea923d44e72a.1653270322.git.wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: synchronous (but super simple) read-repair rework | expand

Commit Message

Qu Wenruo May 23, 2022, 1:48 a.m. UTC
The new infrastructure only has one function,
btrfs_read_repair_sector(), which will try to get the correct content of
that sector.

The idea of the function is very straight-forward:

1) Try to read the next mirror (if possible)
2) Verify the csum (if it has)
3) Go back to 1) if csum mismatch or read failed

All the bio submission is synchronous, meaning we will wait for the
submitted bio to finish before continue.

This can be a performance bottleneck, but considering that:

- Read-repair is already a cold path
- More than one corruption in one read bio is even rarer

Thus I don't think we should spend tons of code on a very cold path, no
to mention complex code itself can be bug prone and harder to maintain.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/Makefile      |  2 +-
 fs/btrfs/read-repair.c | 74 ++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/read-repair.h | 13 ++++++++
 3 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 fs/btrfs/read-repair.c
 create mode 100644 fs/btrfs/read-repair.h
diff mbox series

Patch

diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 99f9995670ea..0b2605c750ca 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -31,7 +31,7 @@  btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 	   backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
 	   uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
 	   block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
-	   subpage.o tree-mod-log.o
+	   subpage.o tree-mod-log.o read-repair.o
 
 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
 btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/read-repair.c b/fs/btrfs/read-repair.c
new file mode 100644
index 000000000000..e3175e27bcbb
--- /dev/null
+++ b/fs/btrfs/read-repair.c
@@ -0,0 +1,74 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bio.h>
+#include "ctree.h"
+#include "volumes.h"
+#include "read-repair.h"
+#include "btrfs_inode.h"
+
+static int get_next_mirror(int cur_mirror, int num_copies)
+{
+	/* In the context of read-repair, we never use 0 as mirror_num. */
+	ASSERT(cur_mirror);
+	return (cur_mirror + 1 > num_copies) ? (cur_mirror + 1 - num_copies) :
+		cur_mirror + 1;
+}
+
+static int get_prev_mirror(int cur_mirror, int num_copies)
+{
+	/* In the context of read-repair, we never use 0 as mirror_num. */
+	ASSERT(cur_mirror);
+	return (cur_mirror - 1 <= 0) ? (num_copies) : cur_mirror - 1;
+}
+
+int btrfs_read_repair_sector(struct inode *inode,
+			     struct page *page, unsigned int pgoff,
+			     u64 logical, u64 file_off, int failed_mirror,
+			     int num_copies, u8 *expected_csum)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	bool uptodate = false;
+	int i;
+
+	/* No more mirrors to retry. */
+	if (num_copies <= 1)
+		return -EIO;
+
+	for (i = get_next_mirror(failed_mirror, num_copies); i != failed_mirror;
+	     i = get_next_mirror(i, num_copies)) {
+		u8 csum[BTRFS_CSUM_SIZE];
+		struct bio *read_bio;
+		int ret;
+
+		read_bio = bio_alloc(NULL, 1, REQ_OP_READ | REQ_SYNC, GFP_NOFS);
+		if (!read_bio)
+			return -EIO;
+		__bio_add_page(read_bio, page, fs_info->sectorsize, pgoff);
+		read_bio->bi_iter.bi_sector = logical >> SECTOR_SHIFT;
+
+		ret = btrfs_map_bio_wait(fs_info, read_bio, i);
+		/* Submit failed, try next mirror. */
+		if (ret < 0)
+			continue;
+
+		if (expected_csum) {
+			ret = btrfs_check_sector_csum(fs_info, page, pgoff,
+						      csum, expected_csum);
+			if (!ret)
+				uptodate = true;
+		} else {
+			uptodate = true;
+		}
+
+		if (uptodate) {
+			btrfs_repair_io_failure(fs_info,
+					btrfs_ino(BTRFS_I(inode)), file_off,
+					fs_info->sectorsize, logical, page,
+					pgoff, get_prev_mirror(i, num_copies));
+			break;
+		}
+	}
+	if (!uptodate)
+		return -EIO;
+	return 0;
+}
diff --git a/fs/btrfs/read-repair.h b/fs/btrfs/read-repair.h
new file mode 100644
index 000000000000..e984ab0b5b18
--- /dev/null
+++ b/fs/btrfs/read-repair.h
@@ -0,0 +1,13 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef BTRFS_READ_REPAIR_H
+#define BTRFS_READ_REPAIR_H
+
+#include <linux/blk_types.h>
+#include <linux/fs.h>
+
+int btrfs_read_repair_sector(struct inode *inode,
+			     struct page *page, unsigned int pgoff,
+			     u64 logical, u64 file_off, int failed_mirror,
+			     int num_copies, u8 *expected_csum);
+#endif