@@ -70,6 +70,101 @@ struct scrub_ctx;
*/
#define BTRFS_MAX_MIRRORS (4 + 1)
+/*
+ * Represent one sector inside a scrub2_stripe.
+ * Contains all the info to verify the sector.
+ */
+struct scrub2_sector {
+ bool is_metadata;
+
+ union {
+ /*
+ * Csum pointer for data csum verification.
+ * Should point to a sector csum inside scrub2_stripe::csums.
+ *
+ * NULL if this data sector has no csum.
+ */
+ u8 *csum;
+
+ /*
+ * Extra info for metadata verification.
+ * All sectors inside a tree block shares the same
+ * geneartion.
+ */
+ u64 generation;
+ };
+};
+
+/*
+ * Represent one continuous range with a length of BTRFS_STRIPE_LEN.
+ */
+struct scrub2_stripe {
+ struct btrfs_block_group *bg;
+
+ struct page *pages[BTRFS_STRIPE_LEN / PAGE_SIZE];
+ struct scrub2_sector *sectors;
+
+ u64 logical;
+ /*
+ * We use btrfs_submit_bio() infrastructure, thus logical + mirror_num
+ * is enough to locate one stripe.
+ */
+ u16 mirror_num;
+
+ /* Should be BTRFS_STRIPE_LEN / sectorsize. */
+ u16 nr_sectors;
+
+ atomic_t pending_io;
+ wait_queue_head_t io_wait;
+
+ /* Indicates which sectors are covered by extent items. */
+ unsigned long used_sector_bitmap;
+
+ /*
+ * Records the errors found after the initial read.
+ * This will be used for repair, as any sector with error needs repair
+ * (if found a good copy).
+ */
+ unsigned long init_error_bitmap;
+
+ /*
+ * After reading another copy and verification, sectors can be repaired
+ * will be cleared.
+ */
+ unsigned long current_error_bitmap;
+
+ /*
+ * The following error_bitmap are all for the initial read operation.
+ * After the initial read, we should not touch those error bitmaps, as
+ * they will later be used to do error reporting.
+ *
+ * Indicates IO errors during read.
+ */
+ unsigned long io_error_bitmap;
+
+ /* For both metadata and data. */
+ unsigned long csum_error_bitmap;
+
+ /*
+ * Indicates metadata specific errors.
+ * (basic sanity checks to transid errors)
+ */
+ unsigned long meta_error_bitmap;
+
+ /*
+ * Checksum for the whole stripe if this stripe is inside a data block
+ * group.
+ */
+ u8 *csums;
+
+ /*
+ * Used to verify any tree block if this stripe is inside a meta block
+ * group.
+ * We reuse the same eb for all metadata of the same stripe.
+ */
+ struct extent_buffer *dummy_eb;
+};
+
struct scrub_recover {
refcount_t refs;
struct btrfs_io_context *bioc;
@@ -266,6 +361,67 @@ static void detach_scrub_page_private(struct page *page)
#endif
}
+static void free_scrub2_stripe(struct scrub2_stripe *stripe)
+{
+ int i;
+
+ if (!stripe)
+ return;
+
+ for (i = 0; i < BTRFS_STRIPE_LEN >> PAGE_SHIFT; i++) {
+ if (stripe->pages[i])
+ __free_page(stripe->pages[i]);
+ }
+ kfree(stripe->sectors);
+ kfree(stripe->csums);
+ if (stripe->dummy_eb)
+ free_extent_buffer(stripe->dummy_eb);
+ kfree(stripe);
+}
+
+struct scrub2_stripe *alloc_scrub2_stripe(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group *bg)
+{
+ struct scrub2_stripe *stripe;
+ int ret;
+
+ stripe = kzalloc(sizeof(*stripe), GFP_KERNEL);
+ if (!stripe)
+ return NULL;
+
+ init_waitqueue_head(&stripe->io_wait);
+ atomic_set(&stripe->pending_io, 0);
+
+ stripe->nr_sectors = BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits;
+
+ ret = btrfs_alloc_page_array(BTRFS_STRIPE_LEN >> PAGE_SHIFT,
+ stripe->pages);
+ if (ret < 0)
+ goto cleanup;
+
+ stripe->sectors = kcalloc(stripe->nr_sectors,
+ sizeof(struct scrub2_sector), GFP_KERNEL);
+ if (!stripe->sectors)
+ goto cleanup;
+
+ if (bg->flags & BTRFS_BLOCK_GROUP_METADATA) {
+ stripe->dummy_eb = alloc_dummy_extent_buffer(fs_info, 0);
+ if (!stripe->dummy_eb)
+ goto cleanup;
+ }
+ if (bg->flags & BTRFS_BLOCK_GROUP_DATA) {
+ stripe->csums = kzalloc(
+ (BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits) *
+ fs_info->csum_size, GFP_KERNEL);
+ if (!stripe->csums)
+ goto cleanup;
+ }
+ return stripe;
+cleanup:
+ free_scrub2_stripe(stripe);
+ return NULL;
+}
+
static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx,
struct btrfs_device *dev,
u64 logical, u64 physical,
@@ -13,4 +13,10 @@ int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
struct btrfs_scrub_progress *progress);
+/*
+ * The following functions are temporary exports to avoid warning on unused
+ * static functions.
+ */
+struct scrub2_stripe *alloc_scrub2_stripe(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group *bg);
#endif
These new structures will have "scrub2_" as their prefix, along with the alloc and free functions for them. The basic idea is, we keep the existing per-device scrub behavior, but get rid of the bio form shaping by always read the full BTRFS_STRIPE_LEN range. This means we will read some sectors which is not scrub target, but that's fine. At write back time we still only submit repaired sectors. With every read submitted in BTRFS_STRIPE_LEN, there should not be much need for a complex bio form shaping mechanism. Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/scrub.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/scrub.h | 6 ++ 2 files changed, 162 insertions(+)