[2/3] xfs_scrub: perform media scans of entire devices
diff mbox series

Message ID 157177021069.1460684.13385243350591362467.stgit@magnolia
State Rejected
Headers show
Series
  • xfs_scrub: media scan entire disks
Related show

Commit Message

Darrick J. Wong Oct. 22, 2019, 6:50 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Add a new feature to xfs_scrub where specifying multiple -x will cause
it to perform a media scan of the entire disk, not just the file data
areas.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Allison Collins <allison.henderson@oracle.com>
---
 man/man8/xfs_scrub.8 |    3 +++
 scrub/phase6.c       |   60 ++++++++++++++++++++++++++++++++++++++++++++++----
 scrub/phase7.c       |    5 ++++
 scrub/xfs_scrub.c    |    4 ++-
 scrub/xfs_scrub.h    |    1 +
 5 files changed, 66 insertions(+), 7 deletions(-)

Comments

Eric Sandeen Nov. 6, 2019, 8:15 p.m. UTC | #1
On 10/22/19 1:50 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Add a new feature to xfs_scrub where specifying multiple -x will cause
> it to perform a media scan of the entire disk, not just the file data
> areas.

Hm.... convince me why this is useful?  It's likely that any such block
will get remapped when it's used anyway, right?  What does this help?

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> Reviewed-by: Allison Collins <allison.henderson@oracle.com>
> ---
>  man/man8/xfs_scrub.8 |    3 +++
>  scrub/phase6.c       |   60 ++++++++++++++++++++++++++++++++++++++++++++++----
>  scrub/phase7.c       |    5 ++++
>  scrub/xfs_scrub.c    |    4 ++-
>  scrub/xfs_scrub.h    |    1 +
>  5 files changed, 66 insertions(+), 7 deletions(-)
> 
> 
> diff --git a/man/man8/xfs_scrub.8 b/man/man8/xfs_scrub.8
> index e881ae76..2cdec380 100644
> --- a/man/man8/xfs_scrub.8
> +++ b/man/man8/xfs_scrub.8
> @@ -97,6 +97,9 @@ Prints the version number and exits.
>  .TP
>  .B \-x
>  Read all file data extents to look for disk errors.
> +If this option is given more than once, scrub all disk contents.

This addition doesn't really make clear what -xx does, IMHO.  Something
more explicit like "look for errors even in freespace?"

> +If this option is given more than twice, report errors even if they have not
> +yet caused data loss.

sooo w/ -xx it'll read freespace but not report errors, with -xxx it will?


-Eric
Darrick J. Wong Nov. 6, 2019, 8:22 p.m. UTC | #2
On Wed, Nov 06, 2019 at 02:15:43PM -0600, Eric Sandeen wrote:
> On 10/22/19 1:50 PM, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Add a new feature to xfs_scrub where specifying multiple -x will cause
> > it to perform a media scan of the entire disk, not just the file data
> > areas.
> 
> Hm.... convince me why this is useful?  It's likely that any such block
> will get remapped when it's used anyway, right?  What does this help?

It's really only useful as a general media scan, which anyone can do
with ddrescue.  It could potentially be useful if storage reverts to the
old days where writes don't actually remap (flooppies!)....

...eh, let's drop this and the next patch.  I don't even think pmem
behaves like that.

> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > Reviewed-by: Allison Collins <allison.henderson@oracle.com>
> > ---
> >  man/man8/xfs_scrub.8 |    3 +++
> >  scrub/phase6.c       |   60 ++++++++++++++++++++++++++++++++++++++++++++++----
> >  scrub/phase7.c       |    5 ++++
> >  scrub/xfs_scrub.c    |    4 ++-
> >  scrub/xfs_scrub.h    |    1 +
> >  5 files changed, 66 insertions(+), 7 deletions(-)
> > 
> > 
> > diff --git a/man/man8/xfs_scrub.8 b/man/man8/xfs_scrub.8
> > index e881ae76..2cdec380 100644
> > --- a/man/man8/xfs_scrub.8
> > +++ b/man/man8/xfs_scrub.8
> > @@ -97,6 +97,9 @@ Prints the version number and exits.
> >  .TP
> >  .B \-x
> >  Read all file data extents to look for disk errors.
> > +If this option is given more than once, scrub all disk contents.
> 
> This addition doesn't really make clear what -xx does, IMHO.  Something
> more explicit like "look for errors even in freespace?"

Yeah.

> > +If this option is given more than twice, report errors even if they have not
> > +yet caused data loss.
> 
> sooo w/ -xx it'll read freespace but not report errors, with -xxx it will?

The same as -xx.

--D

> 
> -Eric

Patch
diff mbox series

diff --git a/man/man8/xfs_scrub.8 b/man/man8/xfs_scrub.8
index e881ae76..2cdec380 100644
--- a/man/man8/xfs_scrub.8
+++ b/man/man8/xfs_scrub.8
@@ -97,6 +97,9 @@  Prints the version number and exits.
 .TP
 .B \-x
 Read all file data extents to look for disk errors.
+If this option is given more than once, scrub all disk contents.
+If this option is given more than twice, report errors even if they have not
+yet caused data loss.
 .B xfs_scrub
 will issue O_DIRECT reads to the block device directly.
 If the block device is a SCSI disk, it will instead issue READ VERIFY commands
diff --git a/scrub/phase6.c b/scrub/phase6.c
index aae6b7d8..9ddddef1 100644
--- a/scrub/phase6.c
+++ b/scrub/phase6.c
@@ -188,7 +188,9 @@  report_data_loss(
 	int				ret;
 
 	/* Only report errors for real extents. */
-	if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
+	if (scrub_data < 3 && (bmap->bm_flags & BMV_OF_PREALLOC))
+		return true;
+	if (bmap->bm_flags & BMV_OF_DELALLOC)
 		return true;
 
 	if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
@@ -376,7 +378,7 @@  report_ioerr_fsmap(
 	uint64_t		err_off;
 
 	/* Don't care about unwritten extents. */
-	if (map->fmr_flags & FMR_OF_PREALLOC)
+	if (scrub_data < 3 && (map->fmr_flags & FMR_OF_PREALLOC))
 		return true;
 
 	if (err_physical > map->fmr_physical)
@@ -603,6 +605,49 @@  remember_ioerr(
 		str_liberror(ctx, ret, _("setting bad block bitmap"));
 }
 
+/* Schedule an entire disk for read verification. */
+static int
+verify_entire_disk(
+	struct read_verify_pool		*rvp,
+	struct disk			*disk,
+	struct media_verify_state	*vs)
+{
+	return read_verify_schedule_io(rvp, 0, disk->d_size, vs);
+}
+
+/* Scan every part of every disk. */
+static bool
+verify_all_disks(
+	struct scrub_ctx		*ctx,
+	struct media_verify_state	*vs)
+{
+	int				ret;
+
+	ret = verify_entire_disk(vs->rvp_data, ctx->datadev, vs);
+	if (ret) {
+		str_liberror(ctx, ret, _("scheduling datadev verify"));
+		return false;
+	}
+
+	if (ctx->logdev) {
+		ret = verify_entire_disk(vs->rvp_log, ctx->logdev, vs);
+		if (ret) {
+			str_liberror(ctx, ret, _("scheduling logdev verify"));
+			return false;
+		}
+	}
+
+	if (ctx->rtdev) {
+		ret = verify_entire_disk(vs->rvp_realtime, ctx->rtdev, vs);
+		if (ret) {
+			str_liberror(ctx, ret, _("scheduling rtdev verify"));
+			return false;
+		}
+	}
+
+	return true;
+}
+
 /*
  * Read verify all the file data blocks in a filesystem.  Since XFS doesn't
  * do data checksums, we trust that the underlying storage will pass back
@@ -658,7 +703,11 @@  xfs_scan_blocks(
 			goto out_logpool;
 		}
 	}
-	moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs);
+
+	if (scrub_data > 1)
+		moveon = verify_all_disks(ctx, &vs);
+	else
+		moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs);
 	if (!moveon)
 		goto out_rtpool;
 
@@ -730,8 +779,9 @@  xfs_estimate_verify_work(
 	if (!moveon)
 		return moveon;
 
-	*items = cvt_off_fsb_to_b(&ctx->mnt,
-			(d_blocks - d_bfree) + (r_blocks - r_bfree));
+	*items = cvt_off_fsb_to_b(&ctx->mnt, d_blocks + r_blocks);
+	if (scrub_data == 1)
+		*items -= cvt_off_fsb_to_b(&ctx->mnt, d_bfree + r_bfree);
 	*nr_threads = disk_heads(ctx->datadev);
 	*rshift = 20;
 	return moveon;
diff --git a/scrub/phase7.c b/scrub/phase7.c
index bc959f5b..570ceb3f 100644
--- a/scrub/phase7.c
+++ b/scrub/phase7.c
@@ -255,6 +255,11 @@  _("%.*f%s inodes counted; %.*f%s inodes checked.\n"),
 		double		b1, b2;
 		char		*b1u, *b2u;
 
+		if (scrub_data > 1) {
+			used_data = cvt_off_fsb_to_b(&ctx->mnt, d_blocks);
+			used_rt = cvt_off_fsb_to_b(&ctx->mnt, r_blocks);
+		}
+
 		b1 = auto_space_units(used_data + used_rt, &b1u);
 		b2 = auto_space_units(ctx->bytes_checked, &b2u);
 		fprintf(stdout,
diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c
index 9945c7f4..b2e58108 100644
--- a/scrub/xfs_scrub.c
+++ b/scrub/xfs_scrub.c
@@ -139,7 +139,7 @@  unsigned int			force_nr_threads;
 bool				verbose;
 
 /* Should we scrub the data blocks? */
-static bool			scrub_data;
+int				scrub_data;
 
 /* Size of a memory page. */
 long				page_size;
@@ -668,7 +668,7 @@  main(
 			fflush(stdout);
 			return SCRUB_RET_SUCCESS;
 		case 'x':
-			scrub_data = true;
+			scrub_data++;
 			break;
 		case '?':
 			/* fall through */
diff --git a/scrub/xfs_scrub.h b/scrub/xfs_scrub.h
index 61831c92..ed0e63b8 100644
--- a/scrub/xfs_scrub.h
+++ b/scrub/xfs_scrub.h
@@ -21,6 +21,7 @@  extern bool			want_fstrim;
 extern bool			stderr_isatty;
 extern bool			stdout_isatty;
 extern bool			is_service;
+extern int			scrub_data;
 
 enum scrub_mode {
 	SCRUB_MODE_DRY_RUN,