@@ -203,6 +203,11 @@ struct scrub_ctx {
#define SCRUB_FS_SECTOR_FLAG_META (1 << 2)
#define SCRUB_FS_SECTOR_FLAG_PARITY (1 << 3)
+/* This marks if the sector belongs to a missing device. */
+#define SCRUB_FS_SECTOR_FLAG_DEV_MISSING (1 << 4)
+#define SCRUB_FS_SECTOR_FLAG_IO_ERROR (1 << 5)
+#define SCRUB_FS_SECTOR_FLAG_IO_DONE (1 << 6)
+
/*
* Represent a sector.
*
@@ -305,6 +310,8 @@ struct scrub_fs_ctx {
* would point to the same location inside the buffer.
*/
u8 *csum_buf;
+
+ wait_queue_head_t wait;
};
struct scrub_warning {
@@ -4559,6 +4566,7 @@ static struct scrub_fs_ctx *scrub_fs_alloc_ctx(struct btrfs_fs_info *fs_info,
sfctx->fs_info = fs_info;
sfctx->readonly = readonly;
atomic_set(&sfctx->sectors_under_io, 0);
+ init_waitqueue_head(&sfctx->wait);
return sfctx;
error:
kfree(sfctx);
@@ -4936,6 +4944,213 @@ static void scrub_fs_reset_stripe(struct scrub_fs_ctx *sfctx)
}
}
+static void mark_missing_dev_sectors(struct scrub_fs_ctx *sfctx,
+ int stripe_nr)
+{
+ struct btrfs_fs_info *fs_info = sfctx->fs_info;
+ const int sectors_per_stripe = BTRFS_STRIPE_LEN >>
+ fs_info->sectorsize_bits;
+ int i;
+
+ for (i = 0; i < sectors_per_stripe; i++) {
+ struct scrub_fs_sector *sector =
+ scrub_fs_get_sector(sfctx, i, stripe_nr);
+
+ sector->flags |= SCRUB_FS_SECTOR_FLAG_DEV_MISSING;
+ }
+}
+
+static struct page *scrub_fs_get_page(struct scrub_fs_ctx *sfctx,
+ int sector_nr)
+{
+ const int sectors_per_stripe = BTRFS_STRIPE_LEN >>
+ sfctx->fs_info->sectorsize_bits;
+ int page_index;
+
+ /* Basic checks to make sure we're accessing a valid sector. */
+ ASSERT(sector_nr >= 0 && sector_nr < sfctx->nr_copies * sectors_per_stripe);
+
+ page_index = sector_nr / (PAGE_SIZE >> sfctx->fs_info->sectorsize_bits);
+
+ ASSERT(sfctx->pages[page_index]);
+ return sfctx->pages[page_index];
+}
+
+static unsigned int scrub_fs_get_page_offset(struct scrub_fs_ctx *sfctx,
+ int sector_nr)
+{
+ const int sectors_per_stripe = BTRFS_STRIPE_LEN >>
+ sfctx->fs_info->sectorsize_bits;
+
+ /* Basic checks to make sure we're accessing a valid sector. */
+ ASSERT(sector_nr >= 0 && sector_nr < sfctx->nr_copies * sectors_per_stripe);
+
+ return offset_in_page(sector_nr << sfctx->fs_info->sectorsize_bits);
+}
+
+static int scrub_fs_get_stripe_nr(struct scrub_fs_ctx *sfctx,
+ struct page *first_page,
+ unsigned int page_off)
+{
+ const int pages_per_stripe = BTRFS_STRIPE_LEN >> PAGE_SHIFT;
+ bool found = false;
+ int i;
+
+ /* The first sector should always be page aligned. */
+ ASSERT(page_off == 0);
+
+ for (i = 0; i < pages_per_stripe * sfctx->nr_copies; i++) {
+ if (first_page == sfctx->pages[i]) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return -1;
+
+ ASSERT(IS_ALIGNED(i, pages_per_stripe));
+
+ return i / pages_per_stripe;
+}
+
+static void scrub_fs_read_endio(struct bio *bio)
+{
+ struct scrub_fs_ctx *sfctx = bio->bi_private;
+ struct btrfs_fs_info *fs_info = sfctx->fs_info;
+ struct bio_vec *first_bvec = bio_first_bvec_all(bio);
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+ int bio_size = 0;
+ bool error = (bio->bi_status != BLK_STS_OK);
+ const int stripe_nr = scrub_fs_get_stripe_nr(sfctx, first_bvec->bv_page,
+ first_bvec->bv_offset);
+ int i;
+
+ /* Grab the bio size for later sanity checks. */
+ bio_for_each_segment_all(bvec, bio, iter_all)
+ bio_size += bvec->bv_len;
+
+ /* We always submit a bio for a stripe length. */
+ ASSERT(bio_size == BTRFS_STRIPE_LEN);
+
+ for (i = 0; i < sfctx->sectors_per_stripe; i++) {
+ struct scrub_fs_sector *sector =
+ scrub_fs_get_sector(sfctx, i, stripe_nr);
+ /*
+ * Here we only set the sector flags, don't do any stat update,
+ * that will be done by the main thread when doing verification.
+ */
+ if (error)
+ sector->flags |= SCRUB_FS_SECTOR_FLAG_IO_ERROR;
+ else
+ sector->flags |= SCRUB_FS_SECTOR_FLAG_IO_DONE;
+ }
+ atomic_sub(bio_size >> fs_info->sectorsize_bits,
+ &sfctx->sectors_under_io);
+ wake_up(&sfctx->wait);
+ bio_put(bio);
+}
+
+static void submit_stripe_read_bio(struct scrub_fs_ctx *sfctx,
+ struct btrfs_io_context *bioc,
+ int stripe_nr)
+{
+ struct btrfs_fs_info *fs_info = sfctx->fs_info;
+ const int sectors_per_stripe = BTRFS_STRIPE_LEN >>
+ fs_info->sectorsize_bits;
+ struct btrfs_io_stripe *stripe = &bioc->stripes[stripe_nr];
+ struct btrfs_device *dev = stripe->dev;
+ struct bio *bio;
+ int ret;
+ int i;
+
+ /*
+ * Missing device, just mark the sectors with missing device
+ * and continue to next copy.
+ */
+ if (!dev || !dev->bdev) {
+ mark_missing_dev_sectors(sfctx, stripe_nr);
+ return;
+ }
+
+ /* Submit a bio to read the stripe length. */
+ bio = bio_alloc(dev->bdev, BIO_MAX_VECS,
+ REQ_OP_READ | REQ_BACKGROUND, GFP_KERNEL);
+
+ /* Bio is backed up by mempool, allocation should not fail. */
+ ASSERT(bio);
+
+ bio->bi_iter.bi_sector = stripe->physical >> SECTOR_SHIFT;
+ for (i = sectors_per_stripe * stripe_nr;
+ i < sectors_per_stripe * (stripe_nr + 1); i++) {
+ struct page *page = scrub_fs_get_page(sfctx, i);
+ unsigned int page_off = scrub_fs_get_page_offset(sfctx, i);
+
+ ret = bio_add_page(bio, page, fs_info->sectorsize, page_off);
+
+ /*
+ * Should not fail as we will at most add STRIPE_LEN / 4K
+ * (aka, 16) sectors, way smaller than BIO_MAX_VECS.
+ */
+ ASSERT(ret == fs_info->sectorsize);
+ }
+
+ bio->bi_private = sfctx;
+ bio->bi_end_io = scrub_fs_read_endio;
+ atomic_add(sectors_per_stripe, &sfctx->sectors_under_io);
+ submit_bio(bio);
+}
+
+static int scrub_fs_one_stripe(struct scrub_fs_ctx *sfctx)
+{
+ struct btrfs_fs_info *fs_info = sfctx->fs_info;
+ struct btrfs_io_context *bioc = NULL;
+ u64 mapped_len = BTRFS_STRIPE_LEN;
+ int i;
+ int ret;
+
+ /* We should at a stripe start inside current block group. */
+ ASSERT(sfctx->cur_bg->start <= sfctx->cur_logical &&
+ sfctx->cur_logical < sfctx->cur_bg->start +
+ sfctx->cur_bg->length);
+ ASSERT(IS_ALIGNED(sfctx->cur_logical - sfctx->cur_bg->start,
+ BTRFS_STRIPE_LEN));
+
+ btrfs_bio_counter_inc_blocked(fs_info);
+ ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
+ sfctx->cur_logical, &mapped_len, &bioc);
+ if (ret < 0)
+ goto out;
+
+ if (mapped_len < BTRFS_STRIPE_LEN) {
+ btrfs_err_rl(fs_info,
+ "get short map for bytenr %llu, got mapped length %llu expect %u",
+ sfctx->cur_logical, mapped_len, BTRFS_STRIPE_LEN);
+ ret = -EUCLEAN;
+ sfctx->stat.nr_fatal_errors++;
+ goto out;
+ }
+
+ if (bioc->num_stripes != sfctx->nr_copies) {
+ btrfs_err_rl(fs_info,
+ "got unexpected number of stripes, got %d stripes expect %d",
+ bioc->num_stripes, sfctx->nr_copies);
+ ret = -EUCLEAN;
+ sfctx->stat.nr_fatal_errors++;
+ goto out;
+ }
+
+ for (i = 0; i < sfctx->nr_copies; i++)
+ submit_stripe_read_bio(sfctx, bioc, i);
+ wait_event(sfctx->wait, atomic_read(&sfctx->sectors_under_io) == 0);
+
+ /* Place holder to verify the read data. */
+out:
+ btrfs_put_bioc(bioc);
+ btrfs_bio_counter_dec(fs_info);
+ return ret;
+}
+
static int scrub_fs_block_group(struct scrub_fs_ctx *sfctx,
struct btrfs_block_group *bg)
{
@@ -4982,8 +5197,9 @@ static int scrub_fs_block_group(struct scrub_fs_ctx *sfctx,
break;
}
- /* Place holder for real stripe scrubbing. */
- ret = 0;
+ ret = scrub_fs_one_stripe(sfctx);
+ if (ret < 0)
+ break;
/* Reset the stripe for next run. */
scrub_fs_reset_stripe(sfctx);
This patch introduce a helper, scrub_fs_one_stripe(). Currently it's only doing the following work: - Submit bios for each copy of 64K stripe We don't need to skip any range which doesn't have data/metadata. That would only eat up the IOPS performance of the disk. At per-stripe initialization time we have marked all sectors unused, until extent tree search time marks the needed sectors DATA/METADATA. So at verification time we can skip those unused sectors. - Wait for the bios to finish No csum verification yet. Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/scrub.c | 220 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 218 insertions(+), 2 deletions(-)