@@ -582,10 +582,6 @@ static void scrub_sector_get(struct scrub_sector *sector);
static void scrub_sector_put(struct scrub_sector *sector);
static void scrub_parity_get(struct scrub_parity *sparity);
static void scrub_parity_put(struct scrub_parity *sparity);
-static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
- u64 physical, struct btrfs_device *dev, u64 flags,
- u64 gen, int mirror_num, u8 *csum,
- u64 physical_for_dev_replace);
static void scrub_bio_end_io(struct bio *bio);
static void scrub_bio_end_io_worker(struct work_struct *work);
static void scrub_block_complete(struct scrub_block *sblock);
@@ -2975,22 +2971,16 @@ static void scrub_sector_put(struct scrub_sector *sector)
kfree(sector);
}
-/*
- * Throttling of IO submission, bandwidth-limit based, the timeslice is 1
- * second. Limit can be set via /sys/fs/UUID/devinfo/devid/scrub_speed_max.
- */
-static void scrub_throttle(struct scrub_ctx *sctx)
+static void scrub_throttle_dev_io(struct scrub_ctx *sctx,
+ struct btrfs_device *device,
+ unsigned int bio_size)
{
const int time_slice = 1000;
- struct scrub_bio *sbio;
- struct btrfs_device *device;
s64 delta;
ktime_t now;
u32 div;
u64 bwlimit;
- sbio = sctx->bios[sctx->curr];
- device = sbio->dev;
bwlimit = READ_ONCE(device->scrub_speed_max);
if (bwlimit == 0)
return;
@@ -3012,7 +3002,7 @@ static void scrub_throttle(struct scrub_ctx *sctx)
/* Still in the time to send? */
if (ktime_before(now, sctx->throttle_deadline)) {
/* If current bio is within the limit, send it */
- sctx->throttle_sent += sbio->bio->bi_iter.bi_size;
+ sctx->throttle_sent += bio_size;
if (sctx->throttle_sent <= div_u64(bwlimit, div))
return;
@@ -3034,6 +3024,17 @@ static void scrub_throttle(struct scrub_ctx *sctx)
sctx->throttle_deadline = 0;
}
+/*
+ * Throttling of IO submission, bandwidth-limit based, the timeslice is 1
+ * second. Limit can be set via /sys/fs/UUID/devinfo/devid/scrub_speed_max.
+ */
+static void scrub_throttle(struct scrub_ctx *sctx)
+{
+ struct scrub_bio *sbio = sctx->bios[sctx->curr];
+
+ scrub_throttle_dev_io(sctx, sbio->dev, sbio->bio->bi_iter.bi_size);
+}
+
static void scrub_submit(struct scrub_ctx *sctx)
{
struct scrub_bio *sbio;
@@ -3118,202 +3119,6 @@ static int scrub_add_sector_to_rd_bio(struct scrub_ctx *sctx,
return 0;
}
-static void scrub_missing_raid56_end_io(struct bio *bio)
-{
- struct scrub_block *sblock = bio->bi_private;
- struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
-
- btrfs_bio_counter_dec(fs_info);
- if (bio->bi_status)
- sblock->no_io_error_seen = 0;
-
- bio_put(bio);
-
- queue_work(fs_info->scrub_workers, &sblock->work);
-}
-
-static void scrub_missing_raid56_worker(struct work_struct *work)
-{
- struct scrub_block *sblock = container_of(work, struct scrub_block, work);
- struct scrub_ctx *sctx = sblock->sctx;
- struct btrfs_fs_info *fs_info = sctx->fs_info;
- u64 logical;
- struct btrfs_device *dev;
-
- logical = sblock->logical;
- dev = sblock->dev;
-
- if (sblock->no_io_error_seen)
- scrub_recheck_block_checksum(sblock);
-
- if (!sblock->no_io_error_seen) {
- spin_lock(&sctx->stat_lock);
- sctx->stat.read_errors++;
- spin_unlock(&sctx->stat_lock);
- btrfs_err_rl_in_rcu(fs_info,
- "IO error rebuilding logical %llu for dev %s",
- logical, btrfs_dev_name(dev));
- } else if (sblock->header_error || sblock->checksum_error) {
- spin_lock(&sctx->stat_lock);
- sctx->stat.uncorrectable_errors++;
- spin_unlock(&sctx->stat_lock);
- btrfs_err_rl_in_rcu(fs_info,
- "failed to rebuild valid logical %llu for dev %s",
- logical, btrfs_dev_name(dev));
- } else {
- scrub_write_block_to_dev_replace(sblock);
- }
-
- if (sctx->is_dev_replace && sctx->flush_all_writes) {
- mutex_lock(&sctx->wr_lock);
- scrub_wr_submit(sctx);
- mutex_unlock(&sctx->wr_lock);
- }
-
- scrub_block_put(sblock);
- scrub_pending_bio_dec(sctx);
-}
-
-static void scrub_missing_raid56_pages(struct scrub_block *sblock)
-{
- struct scrub_ctx *sctx = sblock->sctx;
- struct btrfs_fs_info *fs_info = sctx->fs_info;
- u64 length = sblock->sector_count << fs_info->sectorsize_bits;
- u64 logical = sblock->logical;
- struct btrfs_io_context *bioc = NULL;
- struct bio *bio;
- struct btrfs_raid_bio *rbio;
- int ret;
- int i;
-
- btrfs_bio_counter_inc_blocked(fs_info);
- ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
- &length, &bioc);
- if (ret || !bioc)
- goto bioc_out;
-
- if (WARN_ON(!sctx->is_dev_replace ||
- !(bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
- /*
- * We shouldn't be scrubbing a missing device. Even for dev
- * replace, we should only get here for RAID 5/6. We either
- * managed to mount something with no mirrors remaining or
- * there's a bug in scrub_find_good_copy()/btrfs_map_block().
- */
- goto bioc_out;
- }
-
- bio = bio_alloc(NULL, BIO_MAX_VECS, REQ_OP_READ, GFP_NOFS);
- bio->bi_iter.bi_sector = logical >> 9;
- bio->bi_private = sblock;
- bio->bi_end_io = scrub_missing_raid56_end_io;
-
- rbio = raid56_alloc_missing_rbio(bio, bioc);
- if (!rbio)
- goto rbio_out;
-
- for (i = 0; i < sblock->sector_count; i++) {
- struct scrub_sector *sector = sblock->sectors[i];
-
- raid56_add_scrub_pages(rbio, scrub_sector_get_page(sector),
- scrub_sector_get_page_offset(sector),
- sector->offset + sector->sblock->logical);
- }
-
- INIT_WORK(&sblock->work, scrub_missing_raid56_worker);
- scrub_block_get(sblock);
- scrub_pending_bio_inc(sctx);
- raid56_submit_missing_rbio(rbio);
- btrfs_put_bioc(bioc);
- return;
-
-rbio_out:
- bio_put(bio);
-bioc_out:
- btrfs_bio_counter_dec(fs_info);
- btrfs_put_bioc(bioc);
- spin_lock(&sctx->stat_lock);
- sctx->stat.malloc_errors++;
- spin_unlock(&sctx->stat_lock);
-}
-
-static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
- u64 physical, struct btrfs_device *dev, u64 flags,
- u64 gen, int mirror_num, u8 *csum,
- u64 physical_for_dev_replace)
-{
- struct scrub_block *sblock;
- const u32 sectorsize = sctx->fs_info->sectorsize;
- int index;
-
- sblock = alloc_scrub_block(sctx, dev, logical, physical,
- physical_for_dev_replace, mirror_num);
- if (!sblock) {
- spin_lock(&sctx->stat_lock);
- sctx->stat.malloc_errors++;
- spin_unlock(&sctx->stat_lock);
- return -ENOMEM;
- }
-
- for (index = 0; len > 0; index++) {
- struct scrub_sector *sector;
- /*
- * Here we will allocate one page for one sector to scrub.
- * This is fine if PAGE_SIZE == sectorsize, but will cost
- * more memory for PAGE_SIZE > sectorsize case.
- */
- u32 l = min(sectorsize, len);
-
- sector = alloc_scrub_sector(sblock, logical);
- if (!sector) {
- spin_lock(&sctx->stat_lock);
- sctx->stat.malloc_errors++;
- spin_unlock(&sctx->stat_lock);
- scrub_block_put(sblock);
- return -ENOMEM;
- }
- sector->flags = flags;
- sector->generation = gen;
- if (csum) {
- sector->have_csum = 1;
- memcpy(sector->csum, csum, sctx->fs_info->csum_size);
- } else {
- sector->have_csum = 0;
- }
- len -= l;
- logical += l;
- physical += l;
- physical_for_dev_replace += l;
- }
-
- WARN_ON(sblock->sector_count == 0);
- if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
- /*
- * This case should only be hit for RAID 5/6 device replace. See
- * the comment in scrub_missing_raid56_pages() for details.
- */
- scrub_missing_raid56_pages(sblock);
- } else {
- for (index = 0; index < sblock->sector_count; index++) {
- struct scrub_sector *sector = sblock->sectors[index];
- int ret;
-
- ret = scrub_add_sector_to_rd_bio(sctx, sector);
- if (ret) {
- scrub_block_put(sblock);
- return ret;
- }
- }
-
- if (flags & BTRFS_EXTENT_FLAG_SUPER)
- scrub_submit(sctx);
- }
-
- /* last one frees, either here or in bio completion for last page */
- scrub_block_put(sblock);
- return 0;
-}
-
static void scrub_bio_end_io(struct bio *bio)
{
struct scrub_bio *sbio = bio->bi_private;
@@ -3498,179 +3303,6 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
return 1;
}
-static bool should_use_device(struct btrfs_fs_info *fs_info,
- struct btrfs_device *dev,
- bool follow_replace_read_mode)
-{
- struct btrfs_device *replace_srcdev = fs_info->dev_replace.srcdev;
- struct btrfs_device *replace_tgtdev = fs_info->dev_replace.tgtdev;
-
- if (!dev->bdev)
- return false;
-
- /*
- * We're doing scrub/replace, if it's pure scrub, no tgtdev should be
- * here. If it's replace, we're going to write data to tgtdev, thus
- * the current data of the tgtdev is all garbage, thus we can not use
- * it at all.
- */
- if (dev == replace_tgtdev)
- return false;
-
- /* No need to follow replace read mode, any existing device is fine. */
- if (!follow_replace_read_mode)
- return true;
-
- /* Need to follow the mode. */
- if (fs_info->dev_replace.cont_reading_from_srcdev_mode ==
- BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
- return dev != replace_srcdev;
- return true;
-}
-static int scrub_find_good_copy(struct btrfs_fs_info *fs_info,
- u64 extent_logical, u32 extent_len,
- u64 *extent_physical,
- struct btrfs_device **extent_dev,
- int *extent_mirror_num)
-{
- u64 mapped_length;
- struct btrfs_io_context *bioc = NULL;
- int ret;
- int i;
-
- mapped_length = extent_len;
- ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
- extent_logical, &mapped_length, &bioc, 0);
- if (ret || !bioc || mapped_length < extent_len) {
- btrfs_put_bioc(bioc);
- btrfs_err_rl(fs_info, "btrfs_map_block() failed for logical %llu: %d",
- extent_logical, ret);
- return -EIO;
- }
-
- /*
- * First loop to exclude all missing devices and the source device if
- * needed. And we don't want to use target device as mirror either, as
- * we're doing the replace, the target device range contains nothing.
- */
- for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) {
- struct btrfs_io_stripe *stripe = &bioc->stripes[i];
-
- if (!should_use_device(fs_info, stripe->dev, true))
- continue;
- goto found;
- }
- /*
- * We didn't find any alternative mirrors, we have to break our replace
- * read mode, or we can not read at all.
- */
- for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) {
- struct btrfs_io_stripe *stripe = &bioc->stripes[i];
-
- if (!should_use_device(fs_info, stripe->dev, false))
- continue;
- goto found;
- }
-
- btrfs_err_rl(fs_info, "failed to find any live mirror for logical %llu",
- extent_logical);
- return -EIO;
-
-found:
- *extent_physical = bioc->stripes[i].physical;
- *extent_mirror_num = i + 1;
- *extent_dev = bioc->stripes[i].dev;
- btrfs_put_bioc(bioc);
- return 0;
-}
-
-static bool scrub_need_different_mirror(struct scrub_ctx *sctx,
- struct map_lookup *map,
- struct btrfs_device *dev)
-{
- /*
- * For RAID56, all the extra mirrors are rebuilt from other P/Q,
- * cannot utilize other mirrors directly.
- */
- if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
- return false;
-
- if (!dev->bdev)
- return true;
-
- return sctx->fs_info->dev_replace.cont_reading_from_srcdev_mode ==
- BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID;
-}
-
-/* scrub extent tries to collect up to 64 kB for each bio */
-static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
- u64 logical, u32 len,
- u64 physical, struct btrfs_device *dev, u64 flags,
- u64 gen, int mirror_num)
-{
- struct btrfs_device *src_dev = dev;
- u64 src_physical = physical;
- int src_mirror = mirror_num;
- int ret;
- u8 csum[BTRFS_CSUM_SIZE];
- u32 blocksize;
-
- if (flags & BTRFS_EXTENT_FLAG_DATA) {
- if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
- blocksize = BTRFS_STRIPE_LEN;
- else
- blocksize = sctx->fs_info->sectorsize;
- spin_lock(&sctx->stat_lock);
- sctx->stat.data_extents_scrubbed++;
- sctx->stat.data_bytes_scrubbed += len;
- spin_unlock(&sctx->stat_lock);
- } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
- blocksize = BTRFS_STRIPE_LEN;
- else
- blocksize = sctx->fs_info->nodesize;
- spin_lock(&sctx->stat_lock);
- sctx->stat.tree_extents_scrubbed++;
- sctx->stat.tree_bytes_scrubbed += len;
- spin_unlock(&sctx->stat_lock);
- } else {
- blocksize = sctx->fs_info->sectorsize;
- WARN_ON(1);
- }
-
- /*
- * For dev-replace case, we can have @dev being a missing device, or
- * we want to avoid reading from the source device if possible.
- */
- if (sctx->is_dev_replace && scrub_need_different_mirror(sctx, map, dev)) {
- ret = scrub_find_good_copy(sctx->fs_info, logical, len,
- &src_physical, &src_dev, &src_mirror);
- if (ret < 0)
- return ret;
- }
- while (len) {
- u32 l = min(len, blocksize);
- int have_csum = 0;
-
- if (flags & BTRFS_EXTENT_FLAG_DATA) {
- /* push csums to sbio */
- have_csum = scrub_find_csum(sctx, logical, csum);
- if (have_csum == 0)
- ++sctx->stat.no_csum;
- }
- ret = scrub_sectors(sctx, logical, l, src_physical, src_dev,
- flags, gen, src_mirror,
- have_csum ? csum : NULL, physical);
- if (ret)
- return ret;
- len -= l;
- logical += l;
- physical += l;
- src_physical += l;
- }
- return 0;
-}
-
static int scrub_sectors_for_parity(struct scrub_parity *sparity,
u64 logical, u32 len,
u64 physical, struct btrfs_device *dev,
@@ -4253,20 +3885,6 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
return ret < 0 ? ret : 0;
}
-static void sync_replace_for_zoned(struct scrub_ctx *sctx)
-{
- if (!btrfs_is_zoned(sctx->fs_info))
- return;
-
- sctx->flush_all_writes = true;
- scrub_submit(sctx);
- mutex_lock(&sctx->wr_lock);
- scrub_wr_submit(sctx);
- mutex_unlock(&sctx->wr_lock);
-
- wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
-}
-
static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,
u64 physical, u64 physical_end)
{
@@ -4514,6 +4132,9 @@ static void flush_scrub_stripes(struct scrub_ctx *sctx)
return;
ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &sctx->stripes[0].state));
+
+ scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
+ nr_stripes << BTRFS_STRIPE_LEN_SHIFT);
for (int i = 0; i < nr_stripes; i++) {
stripe = &sctx->stripes[i];
scrub_submit_initial_read(sctx, stripe);
@@ -4571,10 +4192,10 @@ static void flush_scrub_stripes(struct scrub_ctx *sctx)
sctx->cur_stripe = 0;
}
-int queue_scrub_stripe(struct scrub_ctx *sctx,
- struct btrfs_block_group *bg,
- struct btrfs_device *dev, int mirror_num,
- u64 logical, u32 length, u64 physical)
+static int queue_scrub_stripe(struct scrub_ctx *sctx,
+ struct btrfs_block_group *bg,
+ struct btrfs_device *dev, int mirror_num,
+ u64 logical, u32 length, u64 physical)
{
struct scrub_stripe *stripe;
int ret;
@@ -4612,11 +4233,8 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
u64 physical, int mirror_num)
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
- struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bg->start);
- struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bg->start);
const u64 logical_end = logical_start + logical_length;
/* An artificial limit, inherit from old scrub behavior */
- const u32 max_length = SZ_64K;
struct btrfs_path path = { 0 };
u64 cur_logical = logical_start;
int ret;
@@ -4628,11 +4246,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
path.skip_locking = 1;
/* Go through each extent items inside the logical range */
while (cur_logical < logical_end) {
- u64 extent_start;
- u64 extent_len;
- u64 extent_flags;
- u64 extent_gen;
- u64 scrub_len;
+ u64 cur_physical = physical + cur_logical - logical_start;
/* Canceled? */
if (atomic_read(&fs_info->scrub_cancel_req) ||
@@ -4662,8 +4276,9 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
}
spin_unlock(&bg->lock);
- ret = find_first_extent_item(extent_root, &path, cur_logical,
- logical_end - cur_logical);
+ ret = queue_scrub_stripe(sctx, bg, device, mirror_num,
+ cur_logical, logical_end - cur_logical,
+ cur_physical);
if (ret > 0) {
/* No more extent, just update the accounting */
sctx->stat.last_physical = physical + logical_length;
@@ -4672,52 +4287,11 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
}
if (ret < 0)
break;
- get_extent_info(&path, &extent_start, &extent_len,
- &extent_flags, &extent_gen);
- /* Skip hole range which doesn't have any extent */
- cur_logical = max(extent_start, cur_logical);
- /*
- * Scrub len has three limits:
- * - Extent size limit
- * - Scrub range limit
- * This is especially imporatant for RAID0/RAID10 to reuse
- * this function
- * - Max scrub size limit
- */
- scrub_len = min(min(extent_start + extent_len,
- logical_end), cur_logical + max_length) -
- cur_logical;
+ ASSERT(sctx->cur_stripe > 0);
+ cur_logical = sctx->stripes[sctx->cur_stripe - 1].logical
+ + BTRFS_STRIPE_LEN;
- if (extent_flags & BTRFS_EXTENT_FLAG_DATA) {
- ret = btrfs_lookup_csums_list(csum_root, cur_logical,
- cur_logical + scrub_len - 1,
- &sctx->csum_list, 1, false);
- if (ret)
- break;
- }
- if ((extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
- does_range_cross_boundary(extent_start, extent_len,
- logical_start, logical_length)) {
- btrfs_err(fs_info,
-"scrub: tree block %llu spanning boundaries, ignored. boundary=[%llu, %llu)",
- extent_start, logical_start, logical_end);
- spin_lock(&sctx->stat_lock);
- sctx->stat.uncorrectable_errors++;
- spin_unlock(&sctx->stat_lock);
- cur_logical += scrub_len;
- continue;
- }
- ret = scrub_extent(sctx, map, cur_logical, scrub_len,
- cur_logical - logical_start + physical,
- device, extent_flags, extent_gen,
- mirror_num);
- scrub_free_csums(sctx);
- if (ret)
- break;
- if (sctx->is_dev_replace)
- sync_replace_for_zoned(sctx);
- cur_logical += scrub_len;
/* Don't hold CPU for too long time */
cond_resched();
}
@@ -4802,7 +4376,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
int stripe_index)
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
- struct blk_plug plug;
struct map_lookup *map = em->map_lookup;
const u64 profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
const u64 chunk_logical = bg->start;
@@ -4824,12 +4397,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
atomic_read(&sctx->bios_in_flight) == 0);
scrub_blocked_if_needed(fs_info);
- /*
- * collect all data csums for the stripe to avoid seeking during
- * the scrub. This might currently (crc32) end up to be about 1MB
- */
- blk_start_plug(&plug);
-
if (sctx->is_dev_replace &&
btrfs_dev_is_sequential(sctx->wr_tgtdev, physical)) {
mutex_lock(&sctx->wr_lock);
@@ -4931,8 +4498,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
mutex_unlock(&sctx->wr_lock);
-
- blk_finish_plug(&plug);
+ flush_scrub_stripes(sctx);
if (sctx->is_dev_replace && ret >= 0) {
int ret2;
@@ -13,14 +13,4 @@ int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
struct btrfs_scrub_progress *progress);
-/*
- * The following functions are temporary exports to avoid warning on unused
- * static functions.
- */
-struct scrub_stripe;
-int queue_scrub_stripe(struct scrub_ctx *sctx,
- struct btrfs_block_group *bg,
- struct btrfs_device *dev, int mirror_num,
- u64 logical, u32 length, u64 physical);
-
#endif