[v5,09/13] btrfs: scrub: introduce the main read repair worker for scrub_stripe

Message ID	cb0877d338bcc7ccd3e89d4ba84ffd68ad3b7442.1679959770.git.wqu@suse.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-btrfs-owner@vger.kernel.org> From: Qu Wenruo <wqu@suse.com> To: linux-btrfs@vger.kernel.org Cc: David Sterba <dsterba@suse.com> Subject: [PATCH v5 09/13] btrfs: scrub: introduce the main read repair worker for scrub_stripe Date: Tue, 28 Mar 2023 07:30:59 +0800 Message-Id: <cb0877d338bcc7ccd3e89d4ba84ffd68ad3b7442.1679959770.git.wqu@suse.com> In-Reply-To: <cover.1679959770.git.wqu@suse.com> References: <cover.1679959770.git.wqu@suse.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	btrfs: scrub: use a more reader friendly code to implement scrub_simple_mirror() \| expand [v5,00/13] btrfs: scrub: use a more reader friendly code to implement scrub_simple_mirror() [v5,01/13] btrfs: scrub: use dedicated super block verification function to scrub one super block [v5,02/13] btrfs: introduce a new allocator for scrub specific btrfs_bio [v5,03/13] btrfs: introduce a new helper to submit read bio for scrub [v5,04/13] btrfs: introduce a new helper to submit write bio for scrub [v5,05/13] btrfs: scrub: introduce the structure for new BTRFS_STRIPE_LEN based interface [v5,06/13] btrfs: scrub: introduce a helper to find and fill the sector info for a scrub_stripe [v5,07/13] btrfs: scrub: introduce a helper to verify one metadata [v5,08/13] btrfs: scrub: introduce a helper to verify one scrub_stripe [v5,09/13] btrfs: scrub: introduce the main read repair worker for scrub_stripe [v5,10/13] btrfs: scrub: introduce a writeback helper for scrub_stripe [v5,11/13] btrfs: scrub: introduce error reporting functionality for scrub_stripe [v5,12/13] btrfs: scrub: introduce the helper to queue a stripe for scrub [v5,13/13] btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 001972d9a3c4..0effda7bd1a8 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -121,6 +121,7 @@ struct scrub_stripe { atomic_t pending_io; wait_queue_head_t io_wait; + wait_queue_head_t repair_wait; /* * Indicates the states of the stripe. @@ -156,6 +157,8 @@ struct scrub_stripe { * group. */ u8 *csums; + + struct work_struct work; }; struct scrub_recover { @@ -381,6 +384,7 @@ int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe stripe->state = 0; init_waitqueue_head(&stripe->io_wait); + init_waitqueue_head(&stripe->repair_wait); atomic_set(&stripe->pending_io, 0); ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages); @@ -403,7 +407,7 @@ int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe return -ENOMEM; } -void wait_scrub_stripe_io(struct scrub_stripe *stripe) +static void wait_scrub_stripe_io(struct scrub_stripe *stripe) { wait_event(stripe->io_wait, atomic_read(&stripe->pending_io) == 0); } @@ -2339,7 +2343,8 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, } /* Verify specified sectors of a stripe. */ -void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap) +static void scrub_verify_one_stripe(struct scrub_stripe *stripe, + unsigned long bitmap) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; const unsigned int sectors_per_tree = fs_info->nodesize >> @@ -2353,6 +2358,207 @@ void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap) } } +static int calc_sector_number(struct scrub_stripe *stripe, + struct bio_vec *first_bvec) +{ + int i; + + for (i = 0; i < stripe->nr_sectors; i++) { + if (scrub_stripe_get_page(stripe, i) == first_bvec->bv_page && + scrub_stripe_get_page_offset(stripe, i) == first_bvec->bv_offset) + break; + } + ASSERT(i < stripe->nr_sectors); + return i; +} + +/* + * Repair read is different to the regular read by: + * + * - Only reads the failed sectors + * - May have extra blocksize limits + */ +static void scrub_repair_read_endio(struct btrfs_bio *bbio) +{ + struct scrub_stripe *stripe = bbio->private; + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; + struct bio_vec *bvec; + int sector_nr = calc_sector_number(stripe, + bio_first_bvec_all(&bbio->bio)); + int bio_size = 0; + int i; + + ASSERT(sector_nr < stripe->nr_sectors); + + bio_for_each_bvec_all(bvec, &bbio->bio, i) + bio_size += bvec->bv_len; + + if (bbio->bio.bi_status) { + bitmap_set(&stripe->io_error_bitmap, sector_nr, + bio_size >> fs_info->sectorsize_bits); + bitmap_set(&stripe->error_bitmap, sector_nr, + bio_size >> fs_info->sectorsize_bits); + } else { + bitmap_clear(&stripe->io_error_bitmap, sector_nr, + bio_size >> fs_info->sectorsize_bits); + } + bio_put(&bbio->bio); + if (atomic_dec_and_test(&stripe->pending_io)) + wake_up(&stripe->io_wait); +} + +static int calc_next_mirror(int mirror, int num_copies) +{ + ASSERT(mirror <= num_copies); + return (mirror + 1 > num_copies) ? 1 : mirror + 1; +} + +static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe, + int mirror, int blocksize, + bool wait) +{ + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; + struct btrfs_bio *bbio = NULL; + const unsigned long old_error_bitmap = stripe->error_bitmap; + int i; + + ASSERT(stripe->mirror_num >= 1); + ASSERT(atomic_read(&stripe->pending_io) == 0); + + for_each_set_bit(i, &old_error_bitmap, stripe->nr_sectors) { + struct page *page; + int pgoff; + int ret; + + page = scrub_stripe_get_page(stripe, i); + pgoff = scrub_stripe_get_page_offset(stripe, i); + + /* The current sector can not be merged, submit the bio. */ + if (bbio && ((i > 0 && !test_bit(i - 1, &stripe->error_bitmap)) || + bbio->bio.bi_iter.bi_size >= blocksize)) { + ASSERT(bbio->bio.bi_iter.bi_size); + atomic_inc(&stripe->pending_io); + btrfs_submit_scrub_read(bbio, mirror); + if (wait) + wait_scrub_stripe_io(stripe); + bbio = NULL; + } + + if (!bbio) { + bbio = btrfs_scrub_bio_alloc(REQ_OP_READ, fs_info, + scrub_repair_read_endio, stripe); + bbio->bio.bi_iter.bi_sector = (stripe->logical + + (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT; + } + + ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); + ASSERT(ret == fs_info->sectorsize); + } + if (bbio) { + ASSERT(bbio->bio.bi_iter.bi_size); + atomic_inc(&stripe->pending_io); + btrfs_submit_scrub_read(bbio, mirror); + if (wait) + wait_scrub_stripe_io(stripe); + } +} + +/* + * The main entrance for all read related scrub work, including: + * + * - Wait for the initial read to finish + * - Verify and locate any bad sectors + * - Go through the remaining mirrors and try to read as large blocksize as + * possible + * + * - Go through all mirrors (including the failed mirror) sector-by-sector + * + * Writeback does not happen here, they need extra synchronization. + */ +static void scrub_stripe_read_repair_worker(struct work_struct *work) +{ + struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, + work); + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; + int num_copies = btrfs_num_copies(fs_info, stripe->bg->start, + stripe->bg->length); + int mirror; + int i; + + ASSERT(stripe->mirror_num > 0); + + wait_scrub_stripe_io(stripe); + scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap); + /* Save the initial failed bitmap for later repair and report usage. */ + stripe->init_error_bitmap = stripe->error_bitmap; + + if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) + goto out; + + /* + * Try all remaining mirrors. + * + * Here we still try read as large block as possible, as this is faster + * and we have extra safe nets to rely on. + */ + for (mirror = calc_next_mirror(stripe->mirror_num, num_copies); + mirror != stripe->mirror_num; + mirror = calc_next_mirror(mirror, num_copies)) { + const unsigned long old_error_bitmap = stripe->error_bitmap; + + scrub_stripe_submit_repair_read(stripe, mirror, + BTRFS_STRIPE_LEN, false); + wait_scrub_stripe_io(stripe); + scrub_verify_one_stripe(stripe, old_error_bitmap); + if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) + goto out; + } + + /* + * Last safenet, try re-check all mirrors, including the failed one, + * sector-by-sector. + * + * As if one sector failed the drive's internal csum, the whole read + * containing the offending sector would be marked error. + * Thus here we do sector-by-sector read. + * + * This can be slow, thus we only try it as the last resort. + */ + + for (i = 0, mirror = stripe->mirror_num; i < num_copies; + i++, mirror = calc_next_mirror(mirror, num_copies)) { + const unsigned long old_error_bitmap = stripe->error_bitmap; + + scrub_stripe_submit_repair_read(stripe, mirror, + fs_info->sectorsize, true); + wait_scrub_stripe_io(stripe); + scrub_verify_one_stripe(stripe, old_error_bitmap); + if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) + goto out; + } +out: + set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state); + wake_up(&stripe->repair_wait); +} + +void scrub_read_endio(struct btrfs_bio *bbio) +{ + struct scrub_stripe *stripe = bbio->private; + + if (bbio->bio.bi_status) { + bitmap_set(&stripe->io_error_bitmap, 0, stripe->nr_sectors); + bitmap_set(&stripe->error_bitmap, 0, stripe->nr_sectors); + } else { + bitmap_clear(&stripe->io_error_bitmap, 0, stripe->nr_sectors); + } + bio_put(&bbio->bio); + if (atomic_dec_and_test(&stripe->pending_io)) { + wake_up(&stripe->io_wait); + INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker); + queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work); + } +} + static int scrub_checksum_tree_block(struct scrub_block *sblock) { struct scrub_ctx *sctx = sblock->sctx; diff --git a/fs/btrfs/scrub.h b/fs/btrfs/scrub.h index 45ff7e149806..bcc9d398fe07 100644 --- a/fs/btrfs/scrub.h +++ b/fs/btrfs/scrub.h @@ -19,11 +19,10 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, */ struct scrub_stripe; int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe); -void wait_scrub_stripe_io(struct scrub_stripe *stripe); int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, struct btrfs_device *dev, u64 physical, int mirror_num, u64 logical_start, u32 logical_len, struct scrub_stripe *stripe); -void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap); +void scrub_read_endio(struct btrfs_bio *bbio); #endif

[v5,09/13] btrfs: scrub: introduce the main read repair worker for scrub_stripe

Commit Message

Patch