diff mbox series

[29/45] xfs_repair: validate rt groups vs reported hardware zones

Message ID 20250409075557.3535745-30-hch@lst.de (mailing list archive)
State New
Headers show
Series [01/45] xfs: generalize the freespace and reserved blocks handling | expand

Commit Message

Christoph Hellwig April 9, 2025, 7:55 a.m. UTC
Run a report zones ioctl, and verify the rt group state vs the
reported hardware zone state.  Note that there is no way to actually
fix up any discrepancies here, as that would be rather scary without
having transactions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 repair/Makefile |   1 +
 repair/phase5.c |  11 +---
 repair/zoned.c  | 136 ++++++++++++++++++++++++++++++++++++++++++++++++
 repair/zoned.h  |  10 ++++
 4 files changed, 149 insertions(+), 9 deletions(-)
 create mode 100644 repair/zoned.c
 create mode 100644 repair/zoned.h

Comments

Darrick J. Wong April 9, 2025, 6:41 p.m. UTC | #1
On Wed, Apr 09, 2025 at 09:55:32AM +0200, Christoph Hellwig wrote:
> Run a report zones ioctl, and verify the rt group state vs the
> reported hardware zone state.  Note that there is no way to actually
> fix up any discrepancies here, as that would be rather scary without
> having transactions.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  repair/Makefile |   1 +
>  repair/phase5.c |  11 +---
>  repair/zoned.c  | 136 ++++++++++++++++++++++++++++++++++++++++++++++++
>  repair/zoned.h  |  10 ++++
>  4 files changed, 149 insertions(+), 9 deletions(-)
>  create mode 100644 repair/zoned.c
>  create mode 100644 repair/zoned.h
> 
> diff --git a/repair/Makefile b/repair/Makefile
> index ff5b1f5abeda..fb0b2f96cc91 100644
> --- a/repair/Makefile
> +++ b/repair/Makefile
> @@ -81,6 +81,7 @@ CFILES = \
>  	strblobs.c \
>  	threads.c \
>  	versions.c \
> +	zoned.c \
>  	xfs_repair.c
>  
>  LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBXCMD) $(LIBFROG) $(LIBUUID) $(LIBRT) \
> diff --git a/repair/phase5.c b/repair/phase5.c
> index e350b411c243..e44c26885717 100644
> --- a/repair/phase5.c
> +++ b/repair/phase5.c
> @@ -21,6 +21,7 @@
>  #include "rmap.h"
>  #include "bulkload.h"
>  #include "agbtree.h"
> +#include "zoned.h"
>  
>  static uint64_t	*sb_icount_ag;		/* allocated inodes per ag */
>  static uint64_t	*sb_ifree_ag;		/* free inodes per ag */
> @@ -631,15 +632,7 @@ check_rtmetadata(
>  	struct xfs_mount	*mp)
>  {
>  	if (xfs_has_zoned(mp)) {
> -		/*
> -		 * Here we could/should verify the zone state a bit when we are
> -		 * on actual zoned devices:
> -		 *	- compare hw write pointer to last written
> -		 *	- compare zone state to last written
> -		 *
> -		 * Note much we can do when running in zoned mode on a
> -		 * conventional device.
> -		 */
> +		check_zones(mp);
>  		return;
>  	}
>  
> diff --git a/repair/zoned.c b/repair/zoned.c
> new file mode 100644
> index 000000000000..06b2a08dff39
> --- /dev/null
> +++ b/repair/zoned.c
> @@ -0,0 +1,136 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2024 Christoph Hellwig.
> + */
> +#include <ctype.h>
> +#include <linux/blkzoned.h>
> +#include "libxfs_priv.h"
> +#include "libxfs.h"
> +#include "xfs_zones.h"
> +#include "err_protos.h"
> +#include "zoned.h"
> +
> +/* random size that allows efficient processing */
> +#define ZONES_PER_IOCTL			16384
> +
> +static void
> +report_zones_cb(
> +	struct xfs_mount	*mp,
> +	struct blk_zone		*zone)
> +{
> +	xfs_fsblock_t		zsbno = xfs_daddr_to_rtb(mp, zone->start);

        ^^^^^^^^^^^^^ nit: xfs_rtblock_t ?

> +	xfs_rgblock_t		write_pointer;
> +	xfs_rgnumber_t		rgno;
> +	struct xfs_rtgroup	*rtg;
> +
> +	if (xfs_rtb_to_rgbno(mp, zsbno) != 0) {
> +		do_error(_("mismatched zone start 0x%llx."),
> +				(unsigned long long)zsbno);
> +		return;
> +	}
> +
> +	rgno = xfs_rtb_to_rgno(mp, zsbno);
> +	rtg = xfs_rtgroup_grab(mp, rgno);
> +	if (!rtg) {
> +		do_error(_("realtime group not found for zone %u."), rgno);
> +		return;
> +	}
> +
> +	if (!rtg_rmap(rtg))
> +		do_warn(_("no rmap inode for zone %u."), rgno);
> +	else
> +		xfs_zone_validate(zone, rtg, &write_pointer);
> +	xfs_rtgroup_rele(rtg);
> +}
> +
> +void check_zones(struct xfs_mount *mp)
> +{
> +	int fd = mp->m_rtdev_targp->bt_bdev_fd;
> +	uint64_t sector = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart);
> +	unsigned int zone_size, zone_capacity;
> +	struct blk_zone_report *rep;
> +	unsigned int i, n = 0;
> +	uint64_t device_size;
> +	size_t rep_size;

Nit: inconsistent styles in declaration indentation
> +
> +	if (ioctl(fd, BLKGETSIZE64, &device_size))
> +		return; /* not a block device */
> +	if (ioctl(fd, BLKGETZONESZ, &zone_size) || !zone_size)
> +		return;	/* not zoned */
> +
> +	device_size /= 512; /* BLKGETSIZE64 reports a byte value */

device_size = BTOBB(device_size); ?

> +	if (device_size / zone_size < mp->m_sb.sb_rgcount) {
> +		do_error(_("rt device too small\n"));
> +		return;
> +	}
> +
> +	rep_size = sizeof(struct blk_zone_report) +
> +		   sizeof(struct blk_zone) * ZONES_PER_IOCTL;
> +	rep = malloc(rep_size);
> +	if (!rep) {
> +		do_warn(_("malloc failed for zone report\n"));
> +		return;
> +	}
> +
> +	while (n < mp->m_sb.sb_rgcount) {
> +		struct blk_zone *zones = (struct blk_zone *)(rep + 1);
> +		int ret;
> +
> +		memset(rep, 0, rep_size);
> +		rep->sector = sector;
> +		rep->nr_zones = ZONES_PER_IOCTL;
> +
> +		ret = ioctl(fd, BLKREPORTZONE, rep);
> +		if (ret) {
> +			do_error(_("ioctl(BLKREPORTZONE) failed: %d!\n"), ret);
> +			goto out_free;
> +		}
> +		if (!rep->nr_zones)
> +			break;
> +
> +		for (i = 0; i < rep->nr_zones; i++) {
> +			if (n >= mp->m_sb.sb_rgcount)
> +				break;
> +
> +			if (zones[i].len != zone_size) {
> +				do_error(_("Inconsistent zone size!\n"));
> +				goto out_free;
> +			}
> +
> +			switch (zones[i].type) {
> +			case BLK_ZONE_TYPE_CONVENTIONAL:
> +			case BLK_ZONE_TYPE_SEQWRITE_REQ:
> +				break;
> +			case BLK_ZONE_TYPE_SEQWRITE_PREF:
> +				do_error(
> +_("Found sequential write preferred zone\n"));

I wonder, can "sequential preferred" zones be treated as if they are
conventional zones?  Albeit really slow ones?

/me goes to rummage to see if he still has one of these DMSMR disks.

--D

> +				goto out_free;
> +			default:
> +				do_error(
> +_("Found unknown zone type (0x%x)\n"), zones[i].type);
> +				goto out_free;
> +			}
> +
> +			if (!n) {
> +				zone_capacity = zones[i].capacity;
> +				if (zone_capacity > zone_size) {
> +					do_error(
> +_("Zone capacity larger than zone size!\n"));
> +					goto out_free;
> +				}
> +			} else if (zones[i].capacity != zone_capacity) {
> +				do_error(
> +_("Inconsistent zone capacity!\n"));
> +				goto out_free;
> +			}
> +
> +			report_zones_cb(mp, &zones[i]);
> +			n++;
> +		}
> +		sector = zones[rep->nr_zones - 1].start +
> +			 zones[rep->nr_zones - 1].len;
> +	}
> +
> +out_free:
> +	free(rep);
> +}
> diff --git a/repair/zoned.h b/repair/zoned.h
> new file mode 100644
> index 000000000000..ab76bf15b3ca
> --- /dev/null
> +++ b/repair/zoned.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2024 Christoph Hellwig.
> + */
> +#ifndef _XFS_REPAIR_ZONED_H_
> +#define _XFS_REPAIR_ZONED_H_
> +
> +void check_zones(struct xfs_mount *mp);
> +
> +#endif /* _XFS_REPAIR_ZONED_H_ */
> -- 
> 2.47.2
> 
>
Christoph Hellwig April 10, 2025, 6:34 a.m. UTC | #2
On Wed, Apr 09, 2025 at 11:41:12AM -0700, Darrick J. Wong wrote:
> > +#define ZONES_PER_IOCTL			16384
> > +
> > +static void
> > +report_zones_cb(
> > +	struct xfs_mount	*mp,
> > +	struct blk_zone		*zone)
> > +{
> > +	xfs_fsblock_t		zsbno = xfs_daddr_to_rtb(mp, zone->start);
> 
>         ^^^^^^^^^^^^^ nit: xfs_rtblock_t ?

Updated.

> Nit: inconsistent styles in declaration indentation

Fixed.

> > +	device_size /= 512; /* BLKGETSIZE64 reports a byte value */
> 
> device_size = BTOBB(device_size); ?

Sure.

> > +
> > +			switch (zones[i].type) {
> > +			case BLK_ZONE_TYPE_CONVENTIONAL:
> > +			case BLK_ZONE_TYPE_SEQWRITE_REQ:
> > +				break;
> > +			case BLK_ZONE_TYPE_SEQWRITE_PREF:
> > +				do_error(
> > +_("Found sequential write preferred zone\n"));
> 
> I wonder, can "sequential preferred" zones be treated as if they are
> conventional zones?  Albeit really slow ones?

Yes, they could.  However in the kernel we've decided that dealing
them is too painful for the few prototypes build that way and reject
them in the block layer.  So we won't ever seem them here except with
a rather old kernel.
Darrick J. Wong April 10, 2025, 4:43 p.m. UTC | #3
On Thu, Apr 10, 2025 at 08:34:57AM +0200, Christoph Hellwig wrote:
> On Wed, Apr 09, 2025 at 11:41:12AM -0700, Darrick J. Wong wrote:
> > > +#define ZONES_PER_IOCTL			16384
> > > +
> > > +static void
> > > +report_zones_cb(
> > > +	struct xfs_mount	*mp,
> > > +	struct blk_zone		*zone)
> > > +{
> > > +	xfs_fsblock_t		zsbno = xfs_daddr_to_rtb(mp, zone->start);
> > 
> >         ^^^^^^^^^^^^^ nit: xfs_rtblock_t ?
> 
> Updated.
> 
> > Nit: inconsistent styles in declaration indentation
> 
> Fixed.
> 
> > > +	device_size /= 512; /* BLKGETSIZE64 reports a byte value */
> > 
> > device_size = BTOBB(device_size); ?
> 
> Sure.
> 
> > > +
> > > +			switch (zones[i].type) {
> > > +			case BLK_ZONE_TYPE_CONVENTIONAL:
> > > +			case BLK_ZONE_TYPE_SEQWRITE_REQ:
> > > +				break;
> > > +			case BLK_ZONE_TYPE_SEQWRITE_PREF:
> > > +				do_error(
> > > +_("Found sequential write preferred zone\n"));
> > 
> > I wonder, can "sequential preferred" zones be treated as if they are
> > conventional zones?  Albeit really slow ones?
> 
> Yes, they could.  However in the kernel we've decided that dealing
> them is too painful for the few prototypes build that way and reject
> them in the block layer.  So we won't ever seem them here except with
> a rather old kernel.

Ah, I hadn't realized those aren't even supported now, but yeah:

	case BLK_ZONE_TYPE_SEQWRITE_PREF:
	default:
		pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
			disk->disk_name, (int)zone->type, zone->start);

I guess that means drive-managed SMR disks don't export zone info at
all?

--D
diff mbox series

Patch

diff --git a/repair/Makefile b/repair/Makefile
index ff5b1f5abeda..fb0b2f96cc91 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -81,6 +81,7 @@  CFILES = \
 	strblobs.c \
 	threads.c \
 	versions.c \
+	zoned.c \
 	xfs_repair.c
 
 LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBXCMD) $(LIBFROG) $(LIBUUID) $(LIBRT) \
diff --git a/repair/phase5.c b/repair/phase5.c
index e350b411c243..e44c26885717 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -21,6 +21,7 @@ 
 #include "rmap.h"
 #include "bulkload.h"
 #include "agbtree.h"
+#include "zoned.h"
 
 static uint64_t	*sb_icount_ag;		/* allocated inodes per ag */
 static uint64_t	*sb_ifree_ag;		/* free inodes per ag */
@@ -631,15 +632,7 @@  check_rtmetadata(
 	struct xfs_mount	*mp)
 {
 	if (xfs_has_zoned(mp)) {
-		/*
-		 * Here we could/should verify the zone state a bit when we are
-		 * on actual zoned devices:
-		 *	- compare hw write pointer to last written
-		 *	- compare zone state to last written
-		 *
-		 * Note much we can do when running in zoned mode on a
-		 * conventional device.
-		 */
+		check_zones(mp);
 		return;
 	}
 
diff --git a/repair/zoned.c b/repair/zoned.c
new file mode 100644
index 000000000000..06b2a08dff39
--- /dev/null
+++ b/repair/zoned.c
@@ -0,0 +1,136 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2024 Christoph Hellwig.
+ */
+#include <ctype.h>
+#include <linux/blkzoned.h>
+#include "libxfs_priv.h"
+#include "libxfs.h"
+#include "xfs_zones.h"
+#include "err_protos.h"
+#include "zoned.h"
+
+/* random size that allows efficient processing */
+#define ZONES_PER_IOCTL			16384
+
+static void
+report_zones_cb(
+	struct xfs_mount	*mp,
+	struct blk_zone		*zone)
+{
+	xfs_fsblock_t		zsbno = xfs_daddr_to_rtb(mp, zone->start);
+	xfs_rgblock_t		write_pointer;
+	xfs_rgnumber_t		rgno;
+	struct xfs_rtgroup	*rtg;
+
+	if (xfs_rtb_to_rgbno(mp, zsbno) != 0) {
+		do_error(_("mismatched zone start 0x%llx."),
+				(unsigned long long)zsbno);
+		return;
+	}
+
+	rgno = xfs_rtb_to_rgno(mp, zsbno);
+	rtg = xfs_rtgroup_grab(mp, rgno);
+	if (!rtg) {
+		do_error(_("realtime group not found for zone %u."), rgno);
+		return;
+	}
+
+	if (!rtg_rmap(rtg))
+		do_warn(_("no rmap inode for zone %u."), rgno);
+	else
+		xfs_zone_validate(zone, rtg, &write_pointer);
+	xfs_rtgroup_rele(rtg);
+}
+
+void check_zones(struct xfs_mount *mp)
+{
+	int fd = mp->m_rtdev_targp->bt_bdev_fd;
+	uint64_t sector = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart);
+	unsigned int zone_size, zone_capacity;
+	struct blk_zone_report *rep;
+	unsigned int i, n = 0;
+	uint64_t device_size;
+	size_t rep_size;
+
+	if (ioctl(fd, BLKGETSIZE64, &device_size))
+		return; /* not a block device */
+	if (ioctl(fd, BLKGETZONESZ, &zone_size) || !zone_size)
+		return;	/* not zoned */
+
+	device_size /= 512; /* BLKGETSIZE64 reports a byte value */
+	if (device_size / zone_size < mp->m_sb.sb_rgcount) {
+		do_error(_("rt device too small\n"));
+		return;
+	}
+
+	rep_size = sizeof(struct blk_zone_report) +
+		   sizeof(struct blk_zone) * ZONES_PER_IOCTL;
+	rep = malloc(rep_size);
+	if (!rep) {
+		do_warn(_("malloc failed for zone report\n"));
+		return;
+	}
+
+	while (n < mp->m_sb.sb_rgcount) {
+		struct blk_zone *zones = (struct blk_zone *)(rep + 1);
+		int ret;
+
+		memset(rep, 0, rep_size);
+		rep->sector = sector;
+		rep->nr_zones = ZONES_PER_IOCTL;
+
+		ret = ioctl(fd, BLKREPORTZONE, rep);
+		if (ret) {
+			do_error(_("ioctl(BLKREPORTZONE) failed: %d!\n"), ret);
+			goto out_free;
+		}
+		if (!rep->nr_zones)
+			break;
+
+		for (i = 0; i < rep->nr_zones; i++) {
+			if (n >= mp->m_sb.sb_rgcount)
+				break;
+
+			if (zones[i].len != zone_size) {
+				do_error(_("Inconsistent zone size!\n"));
+				goto out_free;
+			}
+
+			switch (zones[i].type) {
+			case BLK_ZONE_TYPE_CONVENTIONAL:
+			case BLK_ZONE_TYPE_SEQWRITE_REQ:
+				break;
+			case BLK_ZONE_TYPE_SEQWRITE_PREF:
+				do_error(
+_("Found sequential write preferred zone\n"));
+				goto out_free;
+			default:
+				do_error(
+_("Found unknown zone type (0x%x)\n"), zones[i].type);
+				goto out_free;
+			}
+
+			if (!n) {
+				zone_capacity = zones[i].capacity;
+				if (zone_capacity > zone_size) {
+					do_error(
+_("Zone capacity larger than zone size!\n"));
+					goto out_free;
+				}
+			} else if (zones[i].capacity != zone_capacity) {
+				do_error(
+_("Inconsistent zone capacity!\n"));
+				goto out_free;
+			}
+
+			report_zones_cb(mp, &zones[i]);
+			n++;
+		}
+		sector = zones[rep->nr_zones - 1].start +
+			 zones[rep->nr_zones - 1].len;
+	}
+
+out_free:
+	free(rep);
+}
diff --git a/repair/zoned.h b/repair/zoned.h
new file mode 100644
index 000000000000..ab76bf15b3ca
--- /dev/null
+++ b/repair/zoned.h
@@ -0,0 +1,10 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2024 Christoph Hellwig.
+ */
+#ifndef _XFS_REPAIR_ZONED_H_
+#define _XFS_REPAIR_ZONED_H_
+
+void check_zones(struct xfs_mount *mp);
+
+#endif /* _XFS_REPAIR_ZONED_H_ */