@@ -1845,6 +1845,7 @@ static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
{
mutex_init(&fs_info->scrub_lock);
atomic_set(&fs_info->scrubs_running, 0);
+ atomic_set(&fs_info->scrubs_logical_running, 0);
atomic_set(&fs_info->scrub_pause_req, 0);
atomic_set(&fs_info->scrubs_paused, 0);
atomic_set(&fs_info->scrub_cancel_req, 0);
@@ -631,7 +631,19 @@ struct btrfs_fs_info {
/* Private scrub information */
struct mutex scrub_lock;
+
+ /*
+ * Number of running scrubbing, including both dev-scrub (at most
+ * one dev-scrub on each device) and logical-scrub (at most
+ * one logical-scrub for each fs).
+ */
atomic_t scrubs_running;
+
+ /*
+ * Number of running logical scrubbing, there is at most one running
+ * logical scrub for each fs.
+ */
+ atomic_t scrubs_logical_running;
atomic_t scrub_pause_req;
atomic_t scrubs_paused;
atomic_t scrub_cancel_req;
@@ -3179,7 +3179,11 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
goto out;
}
- ret = btrfs_scrub_dev(fs_info, sa->devid, sa->start, sa->end,
+ if (sa->flags & BTRFS_SCRUB_LOGICAL)
+ ret = btrfs_scrub_logical(fs_info, sa->start, sa->end,
+ &sa->progress, sa->flags & BTRFS_SCRUB_READONLY);
+ else
+ ret = btrfs_scrub_dev(fs_info, sa->devid, sa->start, sa->end,
&sa->progress, sa->flags & BTRFS_SCRUB_READONLY,
0);
@@ -178,7 +178,8 @@ struct scrub_ctx {
int first_free;
int cur_stripe;
atomic_t cancel_req;
- int readonly;
+ bool readonly;
+ bool scrub_logical;
int sectors_per_bio;
/* Number of stripes we have in @stripes. */
@@ -2841,6 +2842,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
goto out;
}
+ /* Dev-scrub conflicts with logical-scrub. */
+ if (atomic_read(&fs_info->scrubs_logical_running)) {
+ mutex_unlock(&fs_info->scrub_lock);
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ ret = -EINPROGRESS;
+ goto out;
+ }
+
down_read(&fs_info->dev_replace.rwsem);
if (dev->scrub_ctx ||
(!is_dev_replace &&
@@ -2951,6 +2960,67 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return ret;
}
+int btrfs_scrub_logical(struct btrfs_fs_info *fs_info, u64 start, u64 end,
+ struct btrfs_scrub_progress *progress, bool readonly)
+{
+ struct scrub_ctx *sctx;
+ int ret;
+
+ if (btrfs_fs_closing(fs_info))
+ return -EAGAIN;
+
+ /* At mount time we have ensured nodesize is in the range of [4K, 64K]. */
+ ASSERT(fs_info->nodesize <= BTRFS_STRIPE_LEN);
+
+ sctx = scrub_setup_ctx(fs_info, false);
+ if (IS_ERR(sctx))
+ return PTR_ERR(sctx);
+ sctx->scrub_logical = true;
+ sctx->readonly = readonly;
+
+ ret = scrub_workers_get(fs_info, false);
+ if (ret)
+ goto out_free_ctx;
+
+ /* Make sure we're the only running scrub. */
+ mutex_lock(&fs_info->scrub_lock);
+ if (atomic_read(&fs_info->scrubs_running)) {
+ mutex_unlock(&fs_info->scrub_lock);
+ ret = -EINPROGRESS;
+ goto out;
+ }
+ down_read(&fs_info->dev_replace.rwsem);
+ if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
+ up_read(&fs_info->dev_replace.rwsem);
+ mutex_unlock(&fs_info->scrub_lock);
+ ret = -EINPROGRESS;
+ goto out;
+ }
+ up_read(&fs_info->dev_replace.rwsem);
+ /*
+ * Checking @scrub_pause_req here, we can avoid
+ * race between committing transaction and scrubbing.
+ */
+ __scrub_blocked_if_needed(fs_info);
+ atomic_inc(&fs_info->scrubs_running);
+ atomic_inc(&fs_info->scrubs_logical_running);
+ mutex_unlock(&fs_info->scrub_lock);
+
+ /* The main work would be implemented. */
+ ret = -EOPNOTSUPP;
+
+ atomic_dec(&fs_info->scrubs_running);
+ atomic_dec(&fs_info->scrubs_logical_running);
+ wake_up(&fs_info->scrub_pause_wait);
+ if (progress)
+ memcpy(progress, &sctx->stat, sizeof(*progress));
+out:
+ scrub_workers_put(fs_info);
+out_free_ctx:
+ scrub_free_ctx(sctx);
+ return ret;
+}
+
void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
{
mutex_lock(&fs_info->scrub_lock);
@@ -6,6 +6,8 @@
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
int readonly, int is_dev_replace);
+int btrfs_scrub_logical(struct btrfs_fs_info *fs_info, u64 start, u64 end,
+ struct btrfs_scrub_progress *progress, bool readonly);
void btrfs_scrub_pause(struct btrfs_fs_info *fs_info);
void btrfs_scrub_continue(struct btrfs_fs_info *fs_info);
int btrfs_scrub_cancel(struct btrfs_fs_info *info);
@@ -186,8 +186,15 @@ struct btrfs_scrub_progress {
* Intermittent error. */
};
-#define BTRFS_SCRUB_READONLY 1
-#define BTRFS_SCRUB_SUPPORTED_FLAGS (BTRFS_SCRUB_READONLY)
+#define BTRFS_SCRUB_READONLY (1ULL << 0)
+
+/*
+ * Regular scrub is based on device, while with this flag, scrub is
+ * based on logical, and @devid would be ignored.
+ */
+#define BTRFS_SCRUB_LOGICAL (1ULL << 1)
+#define BTRFS_SCRUB_SUPPORTED_FLAGS (BTRFS_SCRUB_READONLY |\
+ BTRFS_SCRUB_LOGICAL)
struct btrfs_ioctl_scrub_args {
__u64 devid; /* in */
__u64 start; /* in */
Currently btrfs scrub is a per-device operation, this is fine for most non-RAID56 profiles, but not a good thing for RAID56 profiles. The main challenge for RAID56 is, we will read out data stripes more than one times (one for the data stripe scrub itself, more for parities). To address this, and maybe even improve the non-RAID56 scrub, here we introduce a new scrub flag, SCRUB_LOGICAL. This would be a per-fs operation, and conflicts with any dev-scrub/dev-replace. This patch only implements the basic exclusion checks. Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/disk-io.c | 1 + fs/btrfs/fs.h | 12 +++++++ fs/btrfs/ioctl.c | 6 +++- fs/btrfs/scrub.c | 72 +++++++++++++++++++++++++++++++++++++- fs/btrfs/scrub.h | 2 ++ include/uapi/linux/btrfs.h | 11 ++++-- 6 files changed, 100 insertions(+), 4 deletions(-)