diff mbox series

btrfs-progs: scrub: Fix scrub cancel/resume not to skip most of the disk

Message ID 20190618080825.15336-1-g.btrfs@cobb.uk.net (mailing list archive)
State New, archived
Headers show
Series btrfs-progs: scrub: Fix scrub cancel/resume not to skip most of the disk | expand

Commit Message

Graham Cobb June 18, 2019, 8:08 a.m. UTC
When a scrub completes or is cancelled, statistics are updated for reporting
in a later btrfs scrub status command and for resuming the scrub. Most
statistics (such as bytes scrubbed) are additive so scrub adds the statistics
from the current run to the saved statistics.

However, the last_physical statistic is not additive. The value from the
current run should replace the saved value. The current code incorrectly
adds the last_physical from the current run to the previous saved value.

This bug causes the resume point to be incorrectly recorded, so large areas
of the disk are skipped when the scrub resumes. As an example, assume a disk
had 1000000 bytes and scrub was cancelled and resumed each time 10% (100000
bytes) had been scrubbed.

Run | Start byte | bytes scrubbed | kernel last_physical | saved last_physical
  1 |          0 |         100000 |               100000 |              100000
  2 |     100000 |         100000 |               200000 |              300000
  3 |     300000 |         100000 |               400000 |              700000
  4 |     700000 |         100000 |               800000 |             1500000
  5 |    1500000 |              0 | immediately completes| completed

In this example, only 40% of the disk is actually scrubbed.

This patch changes the saved/displayed last_physical to track the last
reported value from the kernel.

Signed-off-by: Graham R. Cobb <g.btrfs@cobb.uk.net>
---
 cmds-scrub.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

Comments

Graham Cobb June 25, 2019, 8:17 p.m. UTC | #1
On 18/06/2019 09:08, Graham R. Cobb wrote:
> When a scrub completes or is cancelled, statistics are updated for reporting
> in a later btrfs scrub status command and for resuming the scrub. Most
> statistics (such as bytes scrubbed) are additive so scrub adds the statistics
> from the current run to the saved statistics.
> 
> However, the last_physical statistic is not additive. The value from the
> current run should replace the saved value. The current code incorrectly
> adds the last_physical from the current run to the previous saved value.
> 
> This bug causes the resume point to be incorrectly recorded, so large areas
> of the disk are skipped when the scrub resumes. As an example, assume a disk
> had 1000000 bytes and scrub was cancelled and resumed each time 10% (100000
> bytes) had been scrubbed.
> 
> Run | Start byte | bytes scrubbed | kernel last_physical | saved last_physical
>   1 |          0 |         100000 |               100000 |              100000
>   2 |     100000 |         100000 |               200000 |              300000
>   3 |     300000 |         100000 |               400000 |              700000
>   4 |     700000 |         100000 |               800000 |             1500000
>   5 |    1500000 |              0 | immediately completes| completed
> 
> In this example, only 40% of the disk is actually scrubbed.
> 
> This patch changes the saved/displayed last_physical to track the last
> reported value from the kernel.
> 
> Signed-off-by: Graham R. Cobb <g.btrfs@cobb.uk.net>

Ping? This fix is important for anyone who interrupts and resumes scrubs
-- which will happen more and more as filesystems get bigger. It is a
small fix and would be good to get out to distros.

Graham

> ---
>  cmds-scrub.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/cmds-scrub.c b/cmds-scrub.c
> index f21d2d89..2800e796 100644
> --- a/cmds-scrub.c
> +++ b/cmds-scrub.c
> @@ -171,6 +171,10 @@ static void print_scrub_summary(struct btrfs_scrub_progress *p)
>  	fs_stat->p.name += p->name;		\
>  } while (0)
>  
> +#define _SCRUB_FS_STAT_COPY(p, name, fs_stat) do {	\
> +	fs_stat->p.name = p->name;		\
> +} while (0)
> +
>  #define _SCRUB_FS_STAT_MIN(ss, name, fs_stat)	\
>  do {						\
>  	if (fs_stat->s.name > ss->name) {	\
> @@ -209,7 +213,7 @@ static void add_to_fs_stat(struct btrfs_scrub_progress *p,
>  	_SCRUB_FS_STAT(p, malloc_errors, fs_stat);
>  	_SCRUB_FS_STAT(p, uncorrectable_errors, fs_stat);
>  	_SCRUB_FS_STAT(p, corrected_errors, fs_stat);
> -	_SCRUB_FS_STAT(p, last_physical, fs_stat);
> +	_SCRUB_FS_STAT_COPY(p, last_physical, fs_stat);
>  	_SCRUB_FS_STAT_ZMIN(ss, t_start, fs_stat);
>  	_SCRUB_FS_STAT_ZMIN(ss, t_resumed, fs_stat);
>  	_SCRUB_FS_STAT_ZMAX(ss, duration, fs_stat);
> @@ -683,6 +687,8 @@ static int scrub_writev(int fd, char *buf, int max, const char *fmt, ...)
>  
>  #define _SCRUB_SUM(dest, data, name) dest->scrub_args.progress.name =	\
>  			data->resumed->p.name + data->scrub_args.progress.name
> +#define _SCRUB_COPY(dest, data, name) dest->scrub_args.progress.name =	\
> +			data->scrub_args.progress.name
>  
>  static struct scrub_progress *scrub_resumed_stats(struct scrub_progress *data,
>  						  struct scrub_progress *dest)
> @@ -703,7 +709,7 @@ static struct scrub_progress *scrub_resumed_stats(struct scrub_progress *data,
>  	_SCRUB_SUM(dest, data, malloc_errors);
>  	_SCRUB_SUM(dest, data, uncorrectable_errors);
>  	_SCRUB_SUM(dest, data, corrected_errors);
> -	_SCRUB_SUM(dest, data, last_physical);
> +	_SCRUB_COPY(dest, data, last_physical);
>  	dest->stats.canceled = data->stats.canceled;
>  	dest->stats.finished = data->stats.finished;
>  	dest->stats.t_resumed = data->stats.t_start;
>
David Sterba July 1, 2019, 5:53 p.m. UTC | #2
On Tue, Jun 25, 2019 at 09:17:43PM +0100, Graham Cobb wrote:
> On 18/06/2019 09:08, Graham R. Cobb wrote:
> > When a scrub completes or is cancelled, statistics are updated for reporting
> > in a later btrfs scrub status command and for resuming the scrub. Most
> > statistics (such as bytes scrubbed) are additive so scrub adds the statistics
> > from the current run to the saved statistics.
> > 
> > However, the last_physical statistic is not additive. The value from the
> > current run should replace the saved value. The current code incorrectly
> > adds the last_physical from the current run to the previous saved value.
> > 
> > This bug causes the resume point to be incorrectly recorded, so large areas
> > of the disk are skipped when the scrub resumes. As an example, assume a disk
> > had 1000000 bytes and scrub was cancelled and resumed each time 10% (100000
> > bytes) had been scrubbed.
> > 
> > Run | Start byte | bytes scrubbed | kernel last_physical | saved last_physical
> >   1 |          0 |         100000 |               100000 |              100000
> >   2 |     100000 |         100000 |               200000 |              300000
> >   3 |     300000 |         100000 |               400000 |              700000
> >   4 |     700000 |         100000 |               800000 |             1500000
> >   5 |    1500000 |              0 | immediately completes| completed
> > 
> > In this example, only 40% of the disk is actually scrubbed.
> > 
> > This patch changes the saved/displayed last_physical to track the last
> > reported value from the kernel.
> > 
> > Signed-off-by: Graham R. Cobb <g.btrfs@cobb.uk.net>
> 
> Ping? This fix is important for anyone who interrupts and resumes scrubs
> -- which will happen more and more as filesystems get bigger. It is a
> small fix and would be good to get out to distros.

Sorry for the delay. The analysis and fix are correct, patch added to
devel. Thanks.
diff mbox series

Patch

diff --git a/cmds-scrub.c b/cmds-scrub.c
index f21d2d89..2800e796 100644
--- a/cmds-scrub.c
+++ b/cmds-scrub.c
@@ -171,6 +171,10 @@  static void print_scrub_summary(struct btrfs_scrub_progress *p)
 	fs_stat->p.name += p->name;		\
 } while (0)
 
+#define _SCRUB_FS_STAT_COPY(p, name, fs_stat) do {	\
+	fs_stat->p.name = p->name;		\
+} while (0)
+
 #define _SCRUB_FS_STAT_MIN(ss, name, fs_stat)	\
 do {						\
 	if (fs_stat->s.name > ss->name) {	\
@@ -209,7 +213,7 @@  static void add_to_fs_stat(struct btrfs_scrub_progress *p,
 	_SCRUB_FS_STAT(p, malloc_errors, fs_stat);
 	_SCRUB_FS_STAT(p, uncorrectable_errors, fs_stat);
 	_SCRUB_FS_STAT(p, corrected_errors, fs_stat);
-	_SCRUB_FS_STAT(p, last_physical, fs_stat);
+	_SCRUB_FS_STAT_COPY(p, last_physical, fs_stat);
 	_SCRUB_FS_STAT_ZMIN(ss, t_start, fs_stat);
 	_SCRUB_FS_STAT_ZMIN(ss, t_resumed, fs_stat);
 	_SCRUB_FS_STAT_ZMAX(ss, duration, fs_stat);
@@ -683,6 +687,8 @@  static int scrub_writev(int fd, char *buf, int max, const char *fmt, ...)
 
 #define _SCRUB_SUM(dest, data, name) dest->scrub_args.progress.name =	\
 			data->resumed->p.name + data->scrub_args.progress.name
+#define _SCRUB_COPY(dest, data, name) dest->scrub_args.progress.name =	\
+			data->scrub_args.progress.name
 
 static struct scrub_progress *scrub_resumed_stats(struct scrub_progress *data,
 						  struct scrub_progress *dest)
@@ -703,7 +709,7 @@  static struct scrub_progress *scrub_resumed_stats(struct scrub_progress *data,
 	_SCRUB_SUM(dest, data, malloc_errors);
 	_SCRUB_SUM(dest, data, uncorrectable_errors);
 	_SCRUB_SUM(dest, data, corrected_errors);
-	_SCRUB_SUM(dest, data, last_physical);
+	_SCRUB_COPY(dest, data, last_physical);
 	dest->stats.canceled = data->stats.canceled;
 	dest->stats.finished = data->stats.finished;
 	dest->stats.t_resumed = data->stats.t_start;