Message ID | 157177021069.1460684.13385243350591362467.stgit@magnolia (mailing list archive) |
---|---|
State | Rejected |
Headers | show |
Series | xfs_scrub: media scan entire disks | expand |
On 10/22/19 1:50 PM, Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@oracle.com> > > Add a new feature to xfs_scrub where specifying multiple -x will cause > it to perform a media scan of the entire disk, not just the file data > areas. Hm.... convince me why this is useful? It's likely that any such block will get remapped when it's used anyway, right? What does this help? > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > Reviewed-by: Allison Collins <allison.henderson@oracle.com> > --- > man/man8/xfs_scrub.8 | 3 +++ > scrub/phase6.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++---- > scrub/phase7.c | 5 ++++ > scrub/xfs_scrub.c | 4 ++- > scrub/xfs_scrub.h | 1 + > 5 files changed, 66 insertions(+), 7 deletions(-) > > > diff --git a/man/man8/xfs_scrub.8 b/man/man8/xfs_scrub.8 > index e881ae76..2cdec380 100644 > --- a/man/man8/xfs_scrub.8 > +++ b/man/man8/xfs_scrub.8 > @@ -97,6 +97,9 @@ Prints the version number and exits. > .TP > .B \-x > Read all file data extents to look for disk errors. > +If this option is given more than once, scrub all disk contents. This addition doesn't really make clear what -xx does, IMHO. Something more explicit like "look for errors even in freespace?" > +If this option is given more than twice, report errors even if they have not > +yet caused data loss. sooo w/ -xx it'll read freespace but not report errors, with -xxx it will? -Eric
On Wed, Nov 06, 2019 at 02:15:43PM -0600, Eric Sandeen wrote: > On 10/22/19 1:50 PM, Darrick J. Wong wrote: > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > Add a new feature to xfs_scrub where specifying multiple -x will cause > > it to perform a media scan of the entire disk, not just the file data > > areas. > > Hm.... convince me why this is useful? It's likely that any such block > will get remapped when it's used anyway, right? What does this help? It's really only useful as a general media scan, which anyone can do with ddrescue. It could potentially be useful if storage reverts to the old days where writes don't actually remap (flooppies!).... ...eh, let's drop this and the next patch. I don't even think pmem behaves like that. > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > Reviewed-by: Allison Collins <allison.henderson@oracle.com> > > --- > > man/man8/xfs_scrub.8 | 3 +++ > > scrub/phase6.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++---- > > scrub/phase7.c | 5 ++++ > > scrub/xfs_scrub.c | 4 ++- > > scrub/xfs_scrub.h | 1 + > > 5 files changed, 66 insertions(+), 7 deletions(-) > > > > > > diff --git a/man/man8/xfs_scrub.8 b/man/man8/xfs_scrub.8 > > index e881ae76..2cdec380 100644 > > --- a/man/man8/xfs_scrub.8 > > +++ b/man/man8/xfs_scrub.8 > > @@ -97,6 +97,9 @@ Prints the version number and exits. > > .TP > > .B \-x > > Read all file data extents to look for disk errors. > > +If this option is given more than once, scrub all disk contents. > > This addition doesn't really make clear what -xx does, IMHO. Something > more explicit like "look for errors even in freespace?" Yeah. > > +If this option is given more than twice, report errors even if they have not > > +yet caused data loss. > > sooo w/ -xx it'll read freespace but not report errors, with -xxx it will? The same as -xx. --D > > -Eric
diff --git a/man/man8/xfs_scrub.8 b/man/man8/xfs_scrub.8 index e881ae76..2cdec380 100644 --- a/man/man8/xfs_scrub.8 +++ b/man/man8/xfs_scrub.8 @@ -97,6 +97,9 @@ Prints the version number and exits. .TP .B \-x Read all file data extents to look for disk errors. +If this option is given more than once, scrub all disk contents. +If this option is given more than twice, report errors even if they have not +yet caused data loss. .B xfs_scrub will issue O_DIRECT reads to the block device directly. If the block device is a SCSI disk, it will instead issue READ VERIFY commands diff --git a/scrub/phase6.c b/scrub/phase6.c index aae6b7d8..9ddddef1 100644 --- a/scrub/phase6.c +++ b/scrub/phase6.c @@ -188,7 +188,9 @@ report_data_loss( int ret; /* Only report errors for real extents. */ - if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) + if (scrub_data < 3 && (bmap->bm_flags & BMV_OF_PREALLOC)) + return true; + if (bmap->bm_flags & BMV_OF_DELALLOC) return true; if (fsx->fsx_xflags & FS_XFLAG_REALTIME) @@ -376,7 +378,7 @@ report_ioerr_fsmap( uint64_t err_off; /* Don't care about unwritten extents. */ - if (map->fmr_flags & FMR_OF_PREALLOC) + if (scrub_data < 3 && (map->fmr_flags & FMR_OF_PREALLOC)) return true; if (err_physical > map->fmr_physical) @@ -603,6 +605,49 @@ remember_ioerr( str_liberror(ctx, ret, _("setting bad block bitmap")); } +/* Schedule an entire disk for read verification. */ +static int +verify_entire_disk( + struct read_verify_pool *rvp, + struct disk *disk, + struct media_verify_state *vs) +{ + return read_verify_schedule_io(rvp, 0, disk->d_size, vs); +} + +/* Scan every part of every disk. */ +static bool +verify_all_disks( + struct scrub_ctx *ctx, + struct media_verify_state *vs) +{ + int ret; + + ret = verify_entire_disk(vs->rvp_data, ctx->datadev, vs); + if (ret) { + str_liberror(ctx, ret, _("scheduling datadev verify")); + return false; + } + + if (ctx->logdev) { + ret = verify_entire_disk(vs->rvp_log, ctx->logdev, vs); + if (ret) { + str_liberror(ctx, ret, _("scheduling logdev verify")); + return false; + } + } + + if (ctx->rtdev) { + ret = verify_entire_disk(vs->rvp_realtime, ctx->rtdev, vs); + if (ret) { + str_liberror(ctx, ret, _("scheduling rtdev verify")); + return false; + } + } + + return true; +} + /* * Read verify all the file data blocks in a filesystem. Since XFS doesn't * do data checksums, we trust that the underlying storage will pass back @@ -658,7 +703,11 @@ xfs_scan_blocks( goto out_logpool; } } - moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs); + + if (scrub_data > 1) + moveon = verify_all_disks(ctx, &vs); + else + moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs); if (!moveon) goto out_rtpool; @@ -730,8 +779,9 @@ xfs_estimate_verify_work( if (!moveon) return moveon; - *items = cvt_off_fsb_to_b(&ctx->mnt, - (d_blocks - d_bfree) + (r_blocks - r_bfree)); + *items = cvt_off_fsb_to_b(&ctx->mnt, d_blocks + r_blocks); + if (scrub_data == 1) + *items -= cvt_off_fsb_to_b(&ctx->mnt, d_bfree + r_bfree); *nr_threads = disk_heads(ctx->datadev); *rshift = 20; return moveon; diff --git a/scrub/phase7.c b/scrub/phase7.c index bc959f5b..570ceb3f 100644 --- a/scrub/phase7.c +++ b/scrub/phase7.c @@ -255,6 +255,11 @@ _("%.*f%s inodes counted; %.*f%s inodes checked.\n"), double b1, b2; char *b1u, *b2u; + if (scrub_data > 1) { + used_data = cvt_off_fsb_to_b(&ctx->mnt, d_blocks); + used_rt = cvt_off_fsb_to_b(&ctx->mnt, r_blocks); + } + b1 = auto_space_units(used_data + used_rt, &b1u); b2 = auto_space_units(ctx->bytes_checked, &b2u); fprintf(stdout, diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c index 9945c7f4..b2e58108 100644 --- a/scrub/xfs_scrub.c +++ b/scrub/xfs_scrub.c @@ -139,7 +139,7 @@ unsigned int force_nr_threads; bool verbose; /* Should we scrub the data blocks? */ -static bool scrub_data; +int scrub_data; /* Size of a memory page. */ long page_size; @@ -668,7 +668,7 @@ main( fflush(stdout); return SCRUB_RET_SUCCESS; case 'x': - scrub_data = true; + scrub_data++; break; case '?': /* fall through */ diff --git a/scrub/xfs_scrub.h b/scrub/xfs_scrub.h index 61831c92..ed0e63b8 100644 --- a/scrub/xfs_scrub.h +++ b/scrub/xfs_scrub.h @@ -21,6 +21,7 @@ extern bool want_fstrim; extern bool stderr_isatty; extern bool stdout_isatty; extern bool is_service; +extern int scrub_data; enum scrub_mode { SCRUB_MODE_DRY_RUN,