diff mbox series

[13/38] xfs_scrub: tread zero-length read verify as an IO error

Message ID 172860654176.4183231.4710409081266557549.stgit@frogsfrogsfrogs (mailing list archive)
State Not Applicable, archived
Headers show
Series [01/38] libfrog: report metadata directories in the geometry report | expand

Commit Message

Darrick J. Wong Oct. 11, 2024, 1:17 a.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

While doing some chaos testing on the xfs_scrub read verify code, I
noticed that if the device under a live filesystem gets resized while
scrub is running a media scan, reads will start returning 0.  This
causes read_verify() to run around in an infinite loop instead of
erroring out like it should.

Cc: <linux-xfs@vger.kernel.org> # v5.3.0
Fixes: 27464242956fac ("xfs_scrub: fix read verify disk error handling strategy")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 scrub/phase6.c      |   22 ++++++++++++++++++++++
 scrub/read_verify.c |    8 ++++++++
 2 files changed, 30 insertions(+)
diff mbox series

Patch

diff --git a/scrub/phase6.c b/scrub/phase6.c
index a61853019e290c..54d21820a722a6 100644
--- a/scrub/phase6.c
+++ b/scrub/phase6.c
@@ -44,6 +44,9 @@  struct media_verify_state {
 	struct read_verify_pool	*rvp_realtime;
 	struct bitmap		*d_bad;		/* bytes */
 	struct bitmap		*r_bad;		/* bytes */
+	bool			d_trunc:1;
+	bool			r_trunc:1;
+	bool			l_trunc:1;
 };
 
 /* Find the fd for a given device identifier. */
@@ -544,6 +547,13 @@  report_all_media_errors(
 {
 	int				ret;
 
+	if (vs->d_trunc)
+		str_corrupt(ctx, ctx->mntpoint, _("data device truncated"));
+	if (vs->l_trunc)
+		str_corrupt(ctx, ctx->mntpoint, _("log device truncated"));
+	if (vs->r_trunc)
+		str_corrupt(ctx, ctx->mntpoint, _("rt device truncated"));
+
 	ret = report_disk_ioerrs(ctx, ctx->datadev, vs);
 	if (ret) {
 		str_liberror(ctx, ret, _("walking datadev io errors"));
@@ -663,6 +673,18 @@  remember_ioerr(
 	struct bitmap			*tree;
 	int				ret;
 
+	if (!length) {
+		dev_t			dev = disk_to_dev(ctx, disk);
+
+		if (dev == ctx->fsinfo.fs_datadev)
+			vs->d_trunc = true;
+		else if (dev == ctx->fsinfo.fs_rtdev)
+			vs->r_trunc = true;
+		else if (dev == ctx->fsinfo.fs_logdev)
+			vs->l_trunc = true;
+		return;
+	}
+
 	tree = bitmap_for_disk(ctx, disk, vs);
 	if (!tree) {
 		str_liberror(ctx, ENOENT, _("finding bad block bitmap"));
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index 52348274be2c25..1219efe2590182 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -245,6 +245,14 @@  read_verify(
 					read_error);
 			rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, sz,
 					read_error, rv->io_end_arg);
+		} else if (sz == 0) {
+			/* No bytes at all?  Did we hit the end of the disk? */
+			dbg_printf("EOF %d @ %"PRIu64" %zu err %d\n",
+					rvp->disk->d_fd, rv->io_start, sz,
+					read_error);
+			rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, sz,
+					read_error, rv->io_end_arg);
+			break;
 		} else if (sz < len) {
 			/*
 			 * A short direct read suggests that we might have hit