@@ -24,6 +24,7 @@
#include "accessors.h"
#include "file-item.h"
#include "scrub.h"
+#include "bio.h"
/*
* This is only the first step towards a full-features scrub. It reads all
@@ -416,12 +417,44 @@ struct scrub2_stripe *alloc_scrub2_stripe(struct btrfs_fs_info *fs_info,
if (!stripe->csums)
goto cleanup;
}
+ stripe->bg = bg;
return stripe;
cleanup:
free_scrub2_stripe(stripe);
return NULL;
}
+static struct scrub2_stripe *clone_scrub2_stripe(struct btrfs_fs_info *fs_info,
+ const struct scrub2_stripe *src)
+{
+ struct scrub2_stripe *dst;
+ int sector_nr;
+
+ dst = alloc_scrub2_stripe(fs_info, src->bg);
+ if (!dst)
+ return NULL;
+ if (src->csums)
+ memcpy(dst->csums, src->csums,
+ src->nr_sectors * fs_info->csum_size);
+ bitmap_copy(&dst->used_sector_bitmap, &src->used_sector_bitmap,
+ src->nr_sectors);
+ for_each_set_bit(sector_nr, &src->used_sector_bitmap, src->nr_sectors) {
+ dst->sectors[sector_nr].is_metadata =
+ src->sectors[sector_nr].is_metadata;
+ /* For meta, copy the generation number. */
+ if (src->sectors[sector_nr].is_metadata) {
+ dst->sectors[sector_nr].generation =
+ src->sectors[sector_nr].generation;
+ continue;
+ }
+ /* For data, only update csum pointer if there is data csum. */
+ if (src->sectors[sector_nr].csum)
+ dst->sectors[sector_nr].csum = dst->csums +
+ sector_nr * fs_info->csum_size;
+ }
+ return dst;
+}
+
static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx,
struct btrfs_device *dev,
u64 logical, u64 physical,
@@ -3750,6 +3783,10 @@ static void scrub2_verify_one_sector(struct scrub2_stripe *stripe,
if (test_bit(sector_nr, &stripe->used_sector_bitmap))
return;
+ /* IO error, no need to check. */
+ if (test_bit(sector_nr, &stripe->io_error_bitmap))
+ return;
+
/* Metadata, verify the full tree block. */
if (sector->is_metadata) {
/*
@@ -3785,7 +3822,7 @@ static void scrub2_verify_one_sector(struct scrub2_stripe *stripe,
}
}
-void scrub2_verify_one_stripe(struct scrub2_stripe *stripe)
+static void scrub2_verify_one_stripe(struct scrub2_stripe *stripe)
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
const unsigned int sectors_per_tree = fs_info->nodesize >>
@@ -3810,6 +3847,125 @@ void scrub2_verify_one_stripe(struct scrub2_stripe *stripe)
stripe->nr_sectors);
}
+static void scrub2_read_endio(struct btrfs_bio *bbio)
+{
+ struct scrub2_stripe *stripe = bbio->private;
+
+ if (bbio->bio.bi_status) {
+ bitmap_set(&stripe->io_error_bitmap, 0, stripe->nr_sectors);
+ bitmap_set(&stripe->init_error_bitmap, 0, stripe->nr_sectors);
+ }
+ bio_put(&bbio->bio);
+ if (atomic_dec_and_test(&stripe->pending_io))
+ wake_up(&stripe->io_wait);
+}
+
+static void scrub2_read_and_wait_stripe(struct scrub2_stripe *stripe)
+{
+ struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+ struct bio *bio;
+ int ret;
+ int i;
+
+ ASSERT(stripe->mirror_num >= 1);
+
+ ASSERT(atomic_read(&stripe->pending_io) == 0);
+ bio = btrfs_bio_alloc(BTRFS_STRIPE_LEN >> PAGE_SHIFT, REQ_OP_READ,
+ scrub2_read_endio, stripe);
+ /* Backed by mempool, should not fail. */
+ ASSERT(bio);
+
+ bio->bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT;
+
+ for (i = 0; i < BTRFS_STRIPE_LEN >> PAGE_SHIFT; i++) {
+ ret = bio_add_page(bio, stripe->pages[i], PAGE_SIZE, 0);
+ ASSERT(ret == PAGE_SIZE);
+ }
+ atomic_inc(&stripe->pending_io);
+ btrfs_submit_bio(fs_info, bio, stripe->mirror_num);
+ wait_event(stripe->io_wait, atomic_read(&stripe->pending_io) == 0);
+ scrub2_verify_one_stripe(stripe);
+}
+
+static void scrub2_repair_from_mirror(struct scrub2_stripe *orig,
+ struct scrub2_stripe *repair,
+ int mirror_num)
+{
+ struct btrfs_fs_info *fs_info = orig->bg->fs_info;
+ const int nr_sectors = orig->nr_sectors;
+ int sector_nr;
+
+ /* Reset the error bitmaps for @repair stripe. */
+ bitmap_zero(&repair->current_error_bitmap, nr_sectors);
+ bitmap_zero(&repair->io_error_bitmap, nr_sectors);
+ bitmap_zero(&repair->csum_error_bitmap, nr_sectors);
+ bitmap_zero(&repair->meta_error_bitmap, nr_sectors);
+
+ repair->mirror_num = mirror_num;
+ scrub2_read_and_wait_stripe(repair);
+
+ for_each_set_bit(sector_nr, &orig->used_sector_bitmap, nr_sectors) {
+ int page_index = (sector_nr << fs_info->sectorsize_bits) >>
+ PAGE_SHIFT;
+ int pgoff = offset_in_page(sector_nr << fs_info->sectorsize_bits);
+
+ if (test_bit(sector_nr, &orig->current_error_bitmap) &&
+ !test_bit(sector_nr, &repair->current_error_bitmap)) {
+
+ /* Copy the repaired content. */
+ memcpy_page(orig->pages[page_index], pgoff,
+ repair->pages[page_index], pgoff,
+ fs_info->sectorsize);
+ /*
+ * And clear the bit in @current_error_bitmap, so
+ * later we know we need to write this sector back.
+ */
+ clear_bit(sector_nr, &orig->current_error_bitmap);
+ }
+ }
+}
+
+void scrub2_repair_one_stripe(struct scrub2_stripe *stripe)
+{
+ struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+ struct scrub2_stripe *repair;
+ int nr_copies;
+ int i;
+
+ /*
+ * The stripe should only have been verified once, thus its init and
+ * current error bitmap should match.
+ */
+ ASSERT(bitmap_equal(&stripe->current_error_bitmap,
+ &stripe->init_error_bitmap, stripe->nr_sectors));
+
+ /* The stripe has no error from the beginning. */
+ if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
+ return;
+ nr_copies = btrfs_num_copies(fs_info, stripe->logical,
+ fs_info->sectorsize);
+ /* No extra mirrors to go. */
+ if (nr_copies == 1)
+ return;
+
+ repair = clone_scrub2_stripe(fs_info, stripe);
+ /* Iterate all other copies. */
+ for (i = 0; i < nr_copies - 1; i++) {
+ int next_mirror;
+
+ next_mirror = (i + stripe->mirror_num) >= nr_copies ?
+ (i + stripe->mirror_num - nr_copies) :
+ i + stripe->mirror_num;
+ scrub2_repair_from_mirror(stripe, repair, next_mirror);
+
+ /* Already repaired all bad sectors. */
+ if (bitmap_empty(&stripe->current_error_bitmap,
+ stripe->nr_sectors))
+ break;
+ }
+ free_scrub2_stripe(repair);
+}
+
/*
* Scrub one range which can only has simple mirror based profile.
* (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in
@@ -25,6 +25,6 @@ int scrub2_find_fill_first_stripe(struct btrfs_root *extent_root,
struct btrfs_block_group *bg,
u64 logical_start, u64 logical_len,
struct scrub2_stripe *stripe);
-void scrub2_verify_one_stripe(struct scrub2_stripe *stripe);
+void scrub2_repair_one_stripe(struct scrub2_stripe *stripe);
#endif
The new helper, scrub2_repair_one_stripe(), would do the following work to repair one scrub2_stripe: - Go through each remaining mirrors - Submit a BTRFS_STRIPE_LEN read for the target mirror - Run verification for above read - Copy repaired sectors back to the original stripe And clear the current_error_bitmap bit for the original stripe. - Check if we repaired all the sectors This is a little different than the original repair behavior: - We only try the next mirror if the current mirror can not repair all sectors. While the old behavior is to submit read concurrently for all the remaining mirrors. I'd prefer to put the parallel read into the new scrub_fs interface instead. For current per-device scrub, the sequential repair only makes difference for RAID1C* and RAID6. Thus I'd prefer a cleaner code instead. Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/scrub.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/scrub.h | 2 +- 2 files changed, 158 insertions(+), 2 deletions(-)