Message ID | 8fc39e8af0ce2c335f0e41bdd8b616b30ac2d7b9.1686977659.git.wqu@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: scrub: introduce SCRUB_LOGICAL flag | expand |
On 17.06.23 07:08, Qu Wenruo wrote: > Introduce a new helper, prepare_scrub_block_group(), to handle the > checks before scrubbing a block group. > > The helper would be called by both scrub_enumerate_chunks() and > scrub_logical_chunks(). I'd personally split out the scrub_logical_chunks() part into a new patch to make it easier to read. > > Signed-off-by: Qu Wenruo <wqu@suse.com> > --- > fs/btrfs/scrub.c | 328 ++++++++++++++++++++++++++++++----------------- > 1 file changed, 209 insertions(+), 119 deletions(-) > > diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c > index 598754ad7ce6..aa68cda5226f 100644 > --- a/fs/btrfs/scrub.c > +++ b/fs/btrfs/scrub.c > @@ -2349,6 +2349,141 @@ static int finish_extent_writes_for_zoned(struct btrfs_root *root, > return btrfs_commit_transaction(trans); > } > > +/* > + * Do the preparation before we scrub the block group. > + * > + * Return >0 if we don't need to scrub the block group. > + * Return 0 if we have done the preparation and set @ro_set_ret. > + * Return <0 for error and can not scrub the bg. > + */ > +static int prepare_scrub_block_group(struct scrub_ctx *sctx, > + struct btrfs_block_group *bg, > + bool *ro_set_ret) > +{ > + struct btrfs_fs_info *fs_info = sctx->fs_info; > + struct btrfs_root *root = fs_info->dev_root; > + bool ro_set = false; > + int ret; > + > + if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) { > + if (!test_bit(BLOCK_GROUP_FLAG_TO_COPY, &bg->runtime_flags)) > + return 1; > + } > + > + /* > + * Make sure that while we are scrubbing the corresponding block > + * group doesn't get its logical address and its device extents > + * reused for another block group, which can possibly be of a > + * different type and different profile. We do this to prevent > + * false error detections and crashes due to bogus attempts to > + * repair extents. > + */ > + spin_lock(&bg->lock); > + if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) { > + spin_unlock(&bg->lock); > + return 1; > + } > + btrfs_freeze_block_group(bg); > + spin_unlock(&bg->lock); > + > + /* > + * we need call btrfs_inc_block_group_ro() with scrubs_paused, > + * to avoid deadlock caused by: > + * btrfs_inc_block_group_ro() > + * -> btrfs_wait_for_commit() > + * -> btrfs_commit_transaction() > + * -> btrfs_scrub_pause() > + */ > + scrub_pause_on(fs_info); > + > + /* > + * Don't do chunk preallocation for scrub. > + * > + * This is especially important for SYSTEM bgs, or we can hit > + * -EFBIG from btrfs_finish_chunk_alloc() like: > + * 1. The only SYSTEM bg is marked RO. > + * Since SYSTEM bg is small, that's pretty common. > + * 2. New SYSTEM bg will be allocated > + * Due to regular version will allocate new chunk. > + * 3. New SYSTEM bg is empty and will get cleaned up > + * Before cleanup really happens, it's marked RO again. > + * 4. Empty SYSTEM bg get scrubbed > + * We go back to 2. > + * > + * This can easily boost the amount of SYSTEM chunks if cleaner > + * thread can't be triggered fast enough, and use up all space > + * of btrfs_super_block::sys_chunk_array > + * > + * While for dev replace, we need to try our best to mark block > + * group RO, to prevent race between: > + * - Write duplication > + * Contains latest data > + * - Scrub copy > + * Contains data from commit tree > + * > + * If target block group is not marked RO, nocow writes can > + * be overwritten by scrub copy, causing data corruption. > + * So for dev-replace, it's not allowed to continue if a block > + * group is not RO. > + */ > + ret = btrfs_inc_block_group_ro(bg, sctx->is_dev_replace); > + if (!ret && sctx->is_dev_replace) { > + ret = finish_extent_writes_for_zoned(root, bg); > + if (ret) { > + btrfs_dec_block_group_ro(bg); > + scrub_pause_off(fs_info); > + return ret; > + } > + } > + > + if (ret == 0) { > + ro_set = 1; > + } else if (ret == -ENOSPC && !sctx->is_dev_replace && > + !(bg->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) { > + /* > + * btrfs_inc_block_group_ro return -ENOSPC when it > + * failed in creating new chunk for metadata. > + * It is not a problem for scrub, because > + * metadata are always cowed, and our scrub paused > + * commit_transactions. > + * > + * For RAID56 chunks, we have to mark them read-only > + * for scrub, as later we would use our own cache > + * out of RAID56 realm. > + * Thus we want the RAID56 bg to be marked RO to > + * prevent RMW from screwing up out cache. > + */ > + ro_set = 0; > + } else if (ret == -ETXTBSY) { > + btrfs_warn(fs_info, > + "skipping scrub of block group %llu due to active swapfile", > + bg->start); > + btrfs_unfreeze_block_group(bg); > + scrub_pause_off(fs_info); > + return ret; > + } else { > + btrfs_warn(fs_info, > + "failed setting block group ro: %d", ret); > + btrfs_unfreeze_block_group(bg); > + scrub_pause_off(fs_info); > + return ret; > + } > + > + /* > + * Now the target block is marked RO, wait for nocow writes to > + * finish before dev-replace. > + * COW is fine, as COW never overwrites extents in commit tree. > + */ > + if (sctx->is_dev_replace) { > + btrfs_wait_nocow_writers(bg); > + btrfs_wait_ordered_roots(fs_info, U64_MAX, bg->start, > + bg->length); > + } > + scrub_pause_off(fs_info); > + *ro_set_ret = ro_set; > + return 0; > +} > + > static noinline_for_stack > int scrub_enumerate_chunks(struct scrub_ctx *sctx, > struct btrfs_device *scrub_dev, u64 start, u64 end) > @@ -2359,7 +2494,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, > struct btrfs_root *root = fs_info->dev_root; > u64 chunk_offset; > int ret = 0; > - int ro_set; > int slot; > struct extent_buffer *l; > struct btrfs_key key; > @@ -2380,6 +2514,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, > key.type = BTRFS_DEV_EXTENT_KEY; > > while (1) { > + bool ro_set = false; > u64 dev_extent_len; > > ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); > @@ -2461,127 +2596,20 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, > goto skip; > } > > - if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) { > - if (!test_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags)) { > - btrfs_put_block_group(cache); > - goto skip; > - } > + ret = prepare_scrub_block_group(sctx, cache, &ro_set); > + if (ret == -ETXTBSY) { > + ret = 0; > + goto skip_unfreeze; > } > - > - /* > - * Make sure that while we are scrubbing the corresponding block > - * group doesn't get its logical address and its device extents > - * reused for another block group, which can possibly be of a > - * different type and different profile. We do this to prevent > - * false error detections and crashes due to bogus attempts to > - * repair extents. > - */ > - spin_lock(&cache->lock); > - if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags)) { > - spin_unlock(&cache->lock); > + if (ret < 0) { > + btrfs_put_block_group(cache); > + break; > + } > + if (ret > 0) { > btrfs_put_block_group(cache); > goto skip; > } > - btrfs_freeze_block_group(cache); > - spin_unlock(&cache->lock); > > - /* > - * we need call btrfs_inc_block_group_ro() with scrubs_paused, > - * to avoid deadlock caused by: > - * btrfs_inc_block_group_ro() > - * -> btrfs_wait_for_commit() > - * -> btrfs_commit_transaction() > - * -> btrfs_scrub_pause() > - */ > - scrub_pause_on(fs_info); > - > - /* > - * Don't do chunk preallocation for scrub. > - * > - * This is especially important for SYSTEM bgs, or we can hit > - * -EFBIG from btrfs_finish_chunk_alloc() like: > - * 1. The only SYSTEM bg is marked RO. > - * Since SYSTEM bg is small, that's pretty common. > - * 2. New SYSTEM bg will be allocated > - * Due to regular version will allocate new chunk. > - * 3. New SYSTEM bg is empty and will get cleaned up > - * Before cleanup really happens, it's marked RO again. > - * 4. Empty SYSTEM bg get scrubbed > - * We go back to 2. > - * > - * This can easily boost the amount of SYSTEM chunks if cleaner > - * thread can't be triggered fast enough, and use up all space > - * of btrfs_super_block::sys_chunk_array > - * > - * While for dev replace, we need to try our best to mark block > - * group RO, to prevent race between: > - * - Write duplication > - * Contains latest data > - * - Scrub copy > - * Contains data from commit tree > - * > - * If target block group is not marked RO, nocow writes can > - * be overwritten by scrub copy, causing data corruption. > - * So for dev-replace, it's not allowed to continue if a block > - * group is not RO. > - */ > - ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace); > - if (!ret && sctx->is_dev_replace) { > - ret = finish_extent_writes_for_zoned(root, cache); > - if (ret) { > - btrfs_dec_block_group_ro(cache); > - scrub_pause_off(fs_info); > - btrfs_put_block_group(cache); > - break; > - } > - } > - > - if (ret == 0) { > - ro_set = 1; > - } else if (ret == -ENOSPC && !sctx->is_dev_replace && > - !(cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) { > - /* > - * btrfs_inc_block_group_ro return -ENOSPC when it > - * failed in creating new chunk for metadata. > - * It is not a problem for scrub, because > - * metadata are always cowed, and our scrub paused > - * commit_transactions. > - * > - * For RAID56 chunks, we have to mark them read-only > - * for scrub, as later we would use our own cache > - * out of RAID56 realm. > - * Thus we want the RAID56 bg to be marked RO to > - * prevent RMW from screwing up out cache. > - */ > - ro_set = 0; > - } else if (ret == -ETXTBSY) { > - btrfs_warn(fs_info, > - "skipping scrub of block group %llu due to active swapfile", > - cache->start); > - scrub_pause_off(fs_info); > - ret = 0; > - goto skip_unfreeze; > - } else { > - btrfs_warn(fs_info, > - "failed setting block group ro: %d", ret); > - btrfs_unfreeze_block_group(cache); > - btrfs_put_block_group(cache); > - scrub_pause_off(fs_info); > - break; > - } > - > - /* > - * Now the target block is marked RO, wait for nocow writes to > - * finish before dev-replace. > - * COW is fine, as COW never overwrites extents in commit tree. > - */ > - if (sctx->is_dev_replace) { > - btrfs_wait_nocow_writers(cache); > - btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start, > - cache->length); > - } > - > - scrub_pause_off(fs_info); > down_write(&dev_replace->rwsem); > dev_replace->cursor_right = found_key.offset + dev_extent_len; > dev_replace->cursor_left = found_key.offset; > @@ -2960,6 +2988,69 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, > return ret; > } > > +static int scrub_logical_chunks(struct scrub_ctx *sctx, u64 start, u64 end) > +{ > + struct btrfs_fs_info *fs_info = sctx->fs_info; > + u64 cur = start; > + int ret = 0; > + > + while (cur < end) { > + struct btrfs_block_group *bg; > + bool ro_set = false; > + > + bg = btrfs_lookup_first_block_group(fs_info, cur); > + > + /* No more block groups. */ > + if (!bg) > + break; > + if (bg->start > end) > + break; > + > + ret = prepare_scrub_block_group(sctx, bg, &ro_set); > + if (ret == -ETXTBSY) { > + ret = 0; > + goto next; > + } > + if (ret > 0) > + goto next; > + if (ret < 0) { > + btrfs_put_block_group(bg); > + break; > + } > + > + /* The real work starts here. */ > + ret = -EOPNOTSUPP; > + > + if (ro_set) > + btrfs_dec_block_group_ro(bg); > + /* > + * We might have prevented the cleaner kthread from deleting > + * this block group if it was already unused because we raced > + * and set it to RO mode first. So add it back to the unused > + * list, otherwise it might not ever be deleted unless a manual > + * balance is triggered or it becomes used and unused again. > + */ > + spin_lock(&bg->lock); > + if (!test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags) && > + !bg->ro && bg->reserved == 0 && bg->used == 0) { > + spin_unlock(&bg->lock); > + if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) > + btrfs_discard_queue_work(&fs_info->discard_ctl, bg); > + else > + btrfs_mark_bg_unused(bg); > + } else { > + spin_unlock(&bg->lock); > + } > + btrfs_unfreeze_block_group(bg); > +next: > + cur = bg->start + bg->length; > + btrfs_put_block_group(bg); > + if (ret < 0) > + break; > + } > + return ret; > +} > + > int btrfs_scrub_logical(struct btrfs_fs_info *fs_info, u64 start, u64 end, > struct btrfs_scrub_progress *progress, bool readonly) > { > @@ -3006,8 +3097,7 @@ int btrfs_scrub_logical(struct btrfs_fs_info *fs_info, u64 start, u64 end, > atomic_inc(&fs_info->scrubs_logical_running); > mutex_unlock(&fs_info->scrub_lock); > > - /* The main work would be implemented. */ > - ret = -EOPNOTSUPP; > + ret = scrub_logical_chunks(sctx, start, end); > > atomic_dec(&fs_info->scrubs_running); > atomic_dec(&fs_info->scrubs_logical_running);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 598754ad7ce6..aa68cda5226f 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2349,6 +2349,141 @@ static int finish_extent_writes_for_zoned(struct btrfs_root *root, return btrfs_commit_transaction(trans); } +/* + * Do the preparation before we scrub the block group. + * + * Return >0 if we don't need to scrub the block group. + * Return 0 if we have done the preparation and set @ro_set_ret. + * Return <0 for error and can not scrub the bg. + */ +static int prepare_scrub_block_group(struct scrub_ctx *sctx, + struct btrfs_block_group *bg, + bool *ro_set_ret) +{ + struct btrfs_fs_info *fs_info = sctx->fs_info; + struct btrfs_root *root = fs_info->dev_root; + bool ro_set = false; + int ret; + + if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) { + if (!test_bit(BLOCK_GROUP_FLAG_TO_COPY, &bg->runtime_flags)) + return 1; + } + + /* + * Make sure that while we are scrubbing the corresponding block + * group doesn't get its logical address and its device extents + * reused for another block group, which can possibly be of a + * different type and different profile. We do this to prevent + * false error detections and crashes due to bogus attempts to + * repair extents. + */ + spin_lock(&bg->lock); + if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) { + spin_unlock(&bg->lock); + return 1; + } + btrfs_freeze_block_group(bg); + spin_unlock(&bg->lock); + + /* + * we need call btrfs_inc_block_group_ro() with scrubs_paused, + * to avoid deadlock caused by: + * btrfs_inc_block_group_ro() + * -> btrfs_wait_for_commit() + * -> btrfs_commit_transaction() + * -> btrfs_scrub_pause() + */ + scrub_pause_on(fs_info); + + /* + * Don't do chunk preallocation for scrub. + * + * This is especially important for SYSTEM bgs, or we can hit + * -EFBIG from btrfs_finish_chunk_alloc() like: + * 1. The only SYSTEM bg is marked RO. + * Since SYSTEM bg is small, that's pretty common. + * 2. New SYSTEM bg will be allocated + * Due to regular version will allocate new chunk. + * 3. New SYSTEM bg is empty and will get cleaned up + * Before cleanup really happens, it's marked RO again. + * 4. Empty SYSTEM bg get scrubbed + * We go back to 2. + * + * This can easily boost the amount of SYSTEM chunks if cleaner + * thread can't be triggered fast enough, and use up all space + * of btrfs_super_block::sys_chunk_array + * + * While for dev replace, we need to try our best to mark block + * group RO, to prevent race between: + * - Write duplication + * Contains latest data + * - Scrub copy + * Contains data from commit tree + * + * If target block group is not marked RO, nocow writes can + * be overwritten by scrub copy, causing data corruption. + * So for dev-replace, it's not allowed to continue if a block + * group is not RO. + */ + ret = btrfs_inc_block_group_ro(bg, sctx->is_dev_replace); + if (!ret && sctx->is_dev_replace) { + ret = finish_extent_writes_for_zoned(root, bg); + if (ret) { + btrfs_dec_block_group_ro(bg); + scrub_pause_off(fs_info); + return ret; + } + } + + if (ret == 0) { + ro_set = 1; + } else if (ret == -ENOSPC && !sctx->is_dev_replace && + !(bg->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) { + /* + * btrfs_inc_block_group_ro return -ENOSPC when it + * failed in creating new chunk for metadata. + * It is not a problem for scrub, because + * metadata are always cowed, and our scrub paused + * commit_transactions. + * + * For RAID56 chunks, we have to mark them read-only + * for scrub, as later we would use our own cache + * out of RAID56 realm. + * Thus we want the RAID56 bg to be marked RO to + * prevent RMW from screwing up out cache. + */ + ro_set = 0; + } else if (ret == -ETXTBSY) { + btrfs_warn(fs_info, + "skipping scrub of block group %llu due to active swapfile", + bg->start); + btrfs_unfreeze_block_group(bg); + scrub_pause_off(fs_info); + return ret; + } else { + btrfs_warn(fs_info, + "failed setting block group ro: %d", ret); + btrfs_unfreeze_block_group(bg); + scrub_pause_off(fs_info); + return ret; + } + + /* + * Now the target block is marked RO, wait for nocow writes to + * finish before dev-replace. + * COW is fine, as COW never overwrites extents in commit tree. + */ + if (sctx->is_dev_replace) { + btrfs_wait_nocow_writers(bg); + btrfs_wait_ordered_roots(fs_info, U64_MAX, bg->start, + bg->length); + } + scrub_pause_off(fs_info); + *ro_set_ret = ro_set; + return 0; +} + static noinline_for_stack int scrub_enumerate_chunks(struct scrub_ctx *sctx, struct btrfs_device *scrub_dev, u64 start, u64 end) @@ -2359,7 +2494,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, struct btrfs_root *root = fs_info->dev_root; u64 chunk_offset; int ret = 0; - int ro_set; int slot; struct extent_buffer *l; struct btrfs_key key; @@ -2380,6 +2514,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, key.type = BTRFS_DEV_EXTENT_KEY; while (1) { + bool ro_set = false; u64 dev_extent_len; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -2461,127 +2596,20 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, goto skip; } - if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) { - if (!test_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags)) { - btrfs_put_block_group(cache); - goto skip; - } + ret = prepare_scrub_block_group(sctx, cache, &ro_set); + if (ret == -ETXTBSY) { + ret = 0; + goto skip_unfreeze; } - - /* - * Make sure that while we are scrubbing the corresponding block - * group doesn't get its logical address and its device extents - * reused for another block group, which can possibly be of a - * different type and different profile. We do this to prevent - * false error detections and crashes due to bogus attempts to - * repair extents. - */ - spin_lock(&cache->lock); - if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags)) { - spin_unlock(&cache->lock); + if (ret < 0) { + btrfs_put_block_group(cache); + break; + } + if (ret > 0) { btrfs_put_block_group(cache); goto skip; } - btrfs_freeze_block_group(cache); - spin_unlock(&cache->lock); - /* - * we need call btrfs_inc_block_group_ro() with scrubs_paused, - * to avoid deadlock caused by: - * btrfs_inc_block_group_ro() - * -> btrfs_wait_for_commit() - * -> btrfs_commit_transaction() - * -> btrfs_scrub_pause() - */ - scrub_pause_on(fs_info); - - /* - * Don't do chunk preallocation for scrub. - * - * This is especially important for SYSTEM bgs, or we can hit - * -EFBIG from btrfs_finish_chunk_alloc() like: - * 1. The only SYSTEM bg is marked RO. - * Since SYSTEM bg is small, that's pretty common. - * 2. New SYSTEM bg will be allocated - * Due to regular version will allocate new chunk. - * 3. New SYSTEM bg is empty and will get cleaned up - * Before cleanup really happens, it's marked RO again. - * 4. Empty SYSTEM bg get scrubbed - * We go back to 2. - * - * This can easily boost the amount of SYSTEM chunks if cleaner - * thread can't be triggered fast enough, and use up all space - * of btrfs_super_block::sys_chunk_array - * - * While for dev replace, we need to try our best to mark block - * group RO, to prevent race between: - * - Write duplication - * Contains latest data - * - Scrub copy - * Contains data from commit tree - * - * If target block group is not marked RO, nocow writes can - * be overwritten by scrub copy, causing data corruption. - * So for dev-replace, it's not allowed to continue if a block - * group is not RO. - */ - ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace); - if (!ret && sctx->is_dev_replace) { - ret = finish_extent_writes_for_zoned(root, cache); - if (ret) { - btrfs_dec_block_group_ro(cache); - scrub_pause_off(fs_info); - btrfs_put_block_group(cache); - break; - } - } - - if (ret == 0) { - ro_set = 1; - } else if (ret == -ENOSPC && !sctx->is_dev_replace && - !(cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) { - /* - * btrfs_inc_block_group_ro return -ENOSPC when it - * failed in creating new chunk for metadata. - * It is not a problem for scrub, because - * metadata are always cowed, and our scrub paused - * commit_transactions. - * - * For RAID56 chunks, we have to mark them read-only - * for scrub, as later we would use our own cache - * out of RAID56 realm. - * Thus we want the RAID56 bg to be marked RO to - * prevent RMW from screwing up out cache. - */ - ro_set = 0; - } else if (ret == -ETXTBSY) { - btrfs_warn(fs_info, - "skipping scrub of block group %llu due to active swapfile", - cache->start); - scrub_pause_off(fs_info); - ret = 0; - goto skip_unfreeze; - } else { - btrfs_warn(fs_info, - "failed setting block group ro: %d", ret); - btrfs_unfreeze_block_group(cache); - btrfs_put_block_group(cache); - scrub_pause_off(fs_info); - break; - } - - /* - * Now the target block is marked RO, wait for nocow writes to - * finish before dev-replace. - * COW is fine, as COW never overwrites extents in commit tree. - */ - if (sctx->is_dev_replace) { - btrfs_wait_nocow_writers(cache); - btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start, - cache->length); - } - - scrub_pause_off(fs_info); down_write(&dev_replace->rwsem); dev_replace->cursor_right = found_key.offset + dev_extent_len; dev_replace->cursor_left = found_key.offset; @@ -2960,6 +2988,69 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, return ret; } +static int scrub_logical_chunks(struct scrub_ctx *sctx, u64 start, u64 end) +{ + struct btrfs_fs_info *fs_info = sctx->fs_info; + u64 cur = start; + int ret = 0; + + while (cur < end) { + struct btrfs_block_group *bg; + bool ro_set = false; + + bg = btrfs_lookup_first_block_group(fs_info, cur); + + /* No more block groups. */ + if (!bg) + break; + if (bg->start > end) + break; + + ret = prepare_scrub_block_group(sctx, bg, &ro_set); + if (ret == -ETXTBSY) { + ret = 0; + goto next; + } + if (ret > 0) + goto next; + if (ret < 0) { + btrfs_put_block_group(bg); + break; + } + + /* The real work starts here. */ + ret = -EOPNOTSUPP; + + if (ro_set) + btrfs_dec_block_group_ro(bg); + /* + * We might have prevented the cleaner kthread from deleting + * this block group if it was already unused because we raced + * and set it to RO mode first. So add it back to the unused + * list, otherwise it might not ever be deleted unless a manual + * balance is triggered or it becomes used and unused again. + */ + spin_lock(&bg->lock); + if (!test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags) && + !bg->ro && bg->reserved == 0 && bg->used == 0) { + spin_unlock(&bg->lock); + if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) + btrfs_discard_queue_work(&fs_info->discard_ctl, bg); + else + btrfs_mark_bg_unused(bg); + } else { + spin_unlock(&bg->lock); + } + btrfs_unfreeze_block_group(bg); +next: + cur = bg->start + bg->length; + btrfs_put_block_group(bg); + if (ret < 0) + break; + } + return ret; +} + int btrfs_scrub_logical(struct btrfs_fs_info *fs_info, u64 start, u64 end, struct btrfs_scrub_progress *progress, bool readonly) { @@ -3006,8 +3097,7 @@ int btrfs_scrub_logical(struct btrfs_fs_info *fs_info, u64 start, u64 end, atomic_inc(&fs_info->scrubs_logical_running); mutex_unlock(&fs_info->scrub_lock); - /* The main work would be implemented. */ - ret = -EOPNOTSUPP; + ret = scrub_logical_chunks(sctx, start, end); atomic_dec(&fs_info->scrubs_running); atomic_dec(&fs_info->scrubs_logical_running);
Introduce a new helper, prepare_scrub_block_group(), to handle the checks before scrubbing a block group. The helper would be called by both scrub_enumerate_chunks() and scrub_logical_chunks(). Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/scrub.c | 328 ++++++++++++++++++++++++++++++----------------- 1 file changed, 209 insertions(+), 119 deletions(-)