diff mbox series

[2/2] xfs_io: dump reference count information

Message ID 170405019987.1820694.17452395276567415984.stgit@frogsfrogsfrogs (mailing list archive)
State Deferred, archived
Headers show
Series [1/2] xfs: export reference count information to userspace | expand

Commit Message

Darrick J. Wong Dec. 27, 2023, 1:39 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Dump refcount info from the kernel so we can prototype a sharing-aware
defrag/fs rearranging tool.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 io/Makefile       |    2 
 io/fsrefcounts.c  |  477 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 io/init.c         |    1 
 io/io.h           |    1 
 man/man8/xfs_io.8 |   88 ++++++++++
 5 files changed, 568 insertions(+), 1 deletion(-)
 create mode 100644 io/fsrefcounts.c
diff mbox series

Patch

diff --git a/io/Makefile b/io/Makefile
index 32a40294358..a2faa22e1ec 100644
--- a/io/Makefile
+++ b/io/Makefile
@@ -13,7 +13,7 @@  CFILES = init.c aginfo.c \
 	file.c freeze.c fsuuid.c fsync.c getrusage.c imap.c inject.c label.c \
 	link.c mmap.c open.c parent.c pread.c prealloc.c pwrite.c reflink.c \
 	resblks.c scrub.c seek.c shutdown.c stat.c swapext.c sync.c \
-	truncate.c utimes.c atomicupdate.c
+	truncate.c utimes.c atomicupdate.c fsrefcounts.c
 
 LLDLIBS = $(LIBXCMD) $(LIBHANDLE) $(LIBFROG) $(LIBPTHREAD) $(LIBUUID)
 LTDEPENDENCIES = $(LIBXCMD) $(LIBHANDLE) $(LIBFROG)
diff --git a/io/fsrefcounts.c b/io/fsrefcounts.c
new file mode 100644
index 00000000000..2dac334b3ea
--- /dev/null
+++ b/io/fsrefcounts.c
@@ -0,0 +1,477 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "platform_defs.h"
+#include "command.h"
+#include "init.h"
+#include "libfrog/paths.h"
+#include "io.h"
+#include "input.h"
+#include "libfrog/fsgeom.h"
+
+static cmdinfo_t	fsrefcounts_cmd;
+static dev_t		xfs_data_dev;
+
+static void
+fsrefcounts_help(void)
+{
+	printf(_(
+"\n"
+" Prints extent owner counts for the filesystem hosting the current file"
+"\n"
+" fsrefcounts prints the number of owners of disk blocks used by the whole\n"
+" filesystem. When possible, owner and offset information will be included\n"
+" in the space report.\n"
+"\n"
+" By default, each line of the listing takes the following form:\n"
+"     extent: major:minor [startblock..endblock]: owner startoffset..endoffset length\n"
+" All the file offsets and disk blocks are in units of 512-byte blocks.\n"
+" -d -- query only the data device (default).\n"
+" -l -- query only the log device.\n"
+" -r -- query only the realtime device.\n"
+" -n -- query n extents at a time.\n"
+" -o -- only print extents with at least this many owners (default 1).\n"
+" -O -- only print extents with no more than this many owners (default 2^64-1).\n"
+" -m -- output machine-readable format.\n"
+" -v -- Verbose information, show AG and offsets.  Show flags legend on 2nd -v\n"
+"\n"
+"The optional start and end arguments require one of -d, -l, or -r to be set.\n"
+"\n"));
+}
+
+static void
+dump_refcounts(
+	unsigned long long		*nr,
+	const unsigned long long	min_owners,
+	const unsigned long long	max_owners,
+	struct xfs_getfsrefs_head	*head)
+{
+	unsigned long long		i;
+	struct xfs_getfsrefs		*p;
+
+	for (i = 0, p = head->fch_recs; i < head->fch_entries; i++, p++) {
+		if (p->fcr_owners < min_owners || p->fcr_owners > max_owners)
+			continue;
+		printf("\t%llu: %u:%u [%lld..%lld]: ", i + (*nr),
+			major(p->fcr_device), minor(p->fcr_device),
+			(long long)BTOBBT(p->fcr_physical),
+			(long long)BTOBBT(p->fcr_physical + p->fcr_length - 1));
+		printf(_("%llu %lld\n"),
+			(unsigned long long)p->fcr_owners,
+			(long long)BTOBBT(p->fcr_length));
+	}
+
+	(*nr) += head->fch_entries;
+}
+
+static void
+dump_refcounts_machine(
+	unsigned long long		*nr,
+	const unsigned long long	min_owners,
+	const unsigned long long	max_owners,
+	struct xfs_getfsrefs_head	*head)
+{
+	unsigned long long		i;
+	struct xfs_getfsrefs		*p;
+
+	if (*nr == 0)
+		printf(_("EXT,MAJOR,MINOR,PSTART,PEND,OWNERS,LENGTH\n"));
+	for (i = 0, p = head->fch_recs; i < head->fch_entries; i++, p++) {
+		if (p->fcr_owners < min_owners || p->fcr_owners > max_owners)
+			continue;
+		printf("%llu,%u,%u,%lld,%lld,", i + (*nr),
+			major(p->fcr_device), minor(p->fcr_device),
+			(long long)BTOBBT(p->fcr_physical),
+			(long long)BTOBBT(p->fcr_physical + p->fcr_length - 1));
+		printf("%llu,%lld\n",
+			(unsigned long long)p->fcr_owners,
+			(long long)BTOBBT(p->fcr_length));
+	}
+
+	(*nr) += head->fch_entries;
+}
+
+/*
+ * Verbose mode displays:
+ *   extent: major:minor [startblock..endblock]: owners \
+ *	ag# (agoffset..agendoffset) totalbbs flags
+ */
+#define MINRANGE_WIDTH	16
+#define MINAG_WIDTH	2
+#define MINTOT_WIDTH	5
+#define NFLG		4	/* count of flags */
+#define	FLG_NULL	00000	/* Null flag */
+#define	FLG_BSU		01000	/* Not on begin of stripe unit  */
+#define	FLG_ESU		00100	/* Not on end   of stripe unit  */
+#define	FLG_BSW		00010	/* Not on begin of stripe width */
+#define	FLG_ESW		00001	/* Not on end   of stripe width */
+static void
+dump_refcounts_verbose(
+	unsigned long long		*nr,
+	const unsigned long long	min_owners,
+	const unsigned long long	max_owners,
+	struct xfs_getfsrefs_head	*head,
+	bool				*dumped_flags,
+	struct xfs_fsop_geom		*fsgeo)
+{
+	unsigned long long		i;
+	struct xfs_getfsrefs		*p;
+	int				agno;
+	off64_t				agoff, bperag;
+	int				boff_w, aoff_w, tot_w, agno_w, own_w;
+	int				nr_w, dev_w;
+	char				bbuf[40], abuf[40], obuf[40];
+	char				nbuf[40], dbuf[40], gbuf[40];
+	int				sunit, swidth;
+	int				flg = 0;
+
+	boff_w = aoff_w = own_w = MINRANGE_WIDTH;
+	dev_w = 3;
+	nr_w = 4;
+	tot_w = MINTOT_WIDTH;
+	bperag = (off64_t)fsgeo->agblocks *
+		  (off64_t)fsgeo->blocksize;
+	sunit = (fsgeo->sunit * fsgeo->blocksize);
+	swidth = (fsgeo->swidth * fsgeo->blocksize);
+
+	/*
+	 * Go through the extents and figure out the width
+	 * needed for all columns.
+	 */
+	for (i = 0, p = head->fch_recs; i < head->fch_entries; i++, p++) {
+		if (p->fcr_owners < min_owners || p->fcr_owners > max_owners)
+			continue;
+		if (sunit &&
+		    (p->fcr_physical  % sunit != 0 ||
+		     ((p->fcr_physical + p->fcr_length) % sunit) != 0 ||
+		     p->fcr_physical % swidth != 0 ||
+		     ((p->fcr_physical + p->fcr_length) % swidth) != 0))
+			flg = 1;
+		if (flg)
+			*dumped_flags = true;
+		snprintf(nbuf, sizeof(nbuf), "%llu", (*nr) + i);
+		nr_w = max(nr_w, strlen(nbuf));
+		if (head->fch_oflags & FCH_OF_DEV_T)
+			snprintf(dbuf, sizeof(dbuf), "%u:%u",
+				major(p->fcr_device),
+				minor(p->fcr_device));
+		else
+			snprintf(dbuf, sizeof(dbuf), "0x%x", p->fcr_device);
+		dev_w = max(dev_w, strlen(dbuf));
+		snprintf(bbuf, sizeof(bbuf), "[%lld..%lld]:",
+			(long long)BTOBBT(p->fcr_physical),
+			(long long)BTOBBT(p->fcr_physical + p->fcr_length - 1));
+		boff_w = max(boff_w, strlen(bbuf));
+		snprintf(obuf, sizeof(obuf), "%llu",
+				(unsigned long long)p->fcr_owners);
+		own_w = max(own_w, strlen(obuf));
+		if (p->fcr_device == xfs_data_dev) {
+			agno = p->fcr_physical / bperag;
+			agoff = p->fcr_physical - (agno * bperag);
+			snprintf(abuf, sizeof(abuf),
+				"(%lld..%lld)",
+				(long long)BTOBBT(agoff),
+				(long long)BTOBBT(agoff + p->fcr_length - 1));
+		} else
+			abuf[0] = 0;
+		aoff_w = max(aoff_w, strlen(abuf));
+		tot_w = max(tot_w,
+			numlen(BTOBBT(p->fcr_length), 10));
+	}
+	agno_w = max(MINAG_WIDTH, numlen(fsgeo->agcount, 10));
+	if (*nr == 0)
+		printf("%*s: %-*s %-*s %-*s %*s %-*s %*s%s\n",
+			nr_w, _("EXT"),
+			dev_w, _("DEV"),
+			boff_w, _("BLOCK-RANGE"),
+			own_w, _("OWNERS"),
+			agno_w, _("AG"),
+			aoff_w, _("AG-OFFSET"),
+			tot_w, _("TOTAL"),
+			flg ? _(" FLAGS") : "");
+	for (i = 0, p = head->fch_recs; i < head->fch_entries; i++, p++) {
+		if (p->fcr_owners < min_owners || p->fcr_owners > max_owners)
+			continue;
+		flg = FLG_NULL;
+		/*
+		 * If striping enabled, determine if extent starts/ends
+		 * on a stripe unit boundary.
+		 */
+		if (sunit) {
+			if (p->fcr_physical  % sunit != 0)
+				flg |= FLG_BSU;
+			if (((p->fcr_physical +
+			      p->fcr_length ) % sunit ) != 0)
+				flg |= FLG_ESU;
+			if (p->fcr_physical % swidth != 0)
+				flg |= FLG_BSW;
+			if (((p->fcr_physical +
+			      p->fcr_length ) % swidth ) != 0)
+				flg |= FLG_ESW;
+		}
+		if (head->fch_oflags & FCH_OF_DEV_T)
+			snprintf(dbuf, sizeof(dbuf), "%u:%u",
+				major(p->fcr_device),
+				minor(p->fcr_device));
+		else
+			snprintf(dbuf, sizeof(dbuf), "0x%x", p->fcr_device);
+		snprintf(bbuf, sizeof(bbuf), "[%lld..%lld]:",
+			(long long)BTOBBT(p->fcr_physical),
+			(long long)BTOBBT(p->fcr_physical + p->fcr_length - 1));
+		snprintf(obuf, sizeof(obuf), "%llu",
+			(unsigned long long)p->fcr_owners);
+		if (p->fcr_device == xfs_data_dev) {
+			agno = p->fcr_physical / bperag;
+			agoff = p->fcr_physical - (agno * bperag);
+			snprintf(abuf, sizeof(abuf),
+				"(%lld..%lld)",
+				(long long)BTOBBT(agoff),
+				(long long)BTOBBT(agoff + p->fcr_length - 1));
+			snprintf(gbuf, sizeof(gbuf),
+				"%lld",
+				(long long)agno);
+		} else {
+			abuf[0] = 0;
+			gbuf[0] = 0;
+		}
+		printf("%*llu: %-*s %-*s %-*s %-*s %-*s %*lld",
+			nr_w, (*nr) + i, dev_w, dbuf, boff_w, bbuf, own_w,
+			obuf, agno_w, gbuf, aoff_w, abuf, tot_w,
+			(long long)BTOBBT(p->fcr_length));
+		if (flg == FLG_NULL)
+			printf("\n");
+		else
+			printf(" %-*.*o\n", NFLG, NFLG, flg);
+	}
+
+	(*nr) += head->fch_entries;
+}
+
+static void
+dump_verbose_key(void)
+{
+	printf(_(" FLAG Values:\n"));
+	printf(_("    %*.*o Doesn't begin on stripe unit\n"),
+		NFLG+1, NFLG+1, FLG_BSU);
+	printf(_("    %*.*o Doesn't end   on stripe unit\n"),
+		NFLG+1, NFLG+1, FLG_ESU);
+	printf(_("    %*.*o Doesn't begin on stripe width\n"),
+		NFLG+1, NFLG+1, FLG_BSW);
+	printf(_("    %*.*o Doesn't end   on stripe width\n"),
+		NFLG+1, NFLG+1, FLG_ESW);
+}
+
+static int
+fsrefcounts_f(
+	int			argc,
+	char			**argv)
+{
+	struct xfs_getfsrefs		*p;
+	struct xfs_getfsrefs_head	*head;
+	struct xfs_getfsrefs		*l, *h;
+	struct xfs_fsop_geom	fsgeo;
+	long long		start = 0;
+	long long		end = -1;
+	unsigned long long	min_owners = 1;
+	unsigned long long	max_owners = ULLONG_MAX;
+	int			map_size;
+	int			nflag = 0;
+	int			vflag = 0;
+	int			mflag = 0;
+	int			i = 0;
+	int			c;
+	unsigned long long	nr = 0;
+	size_t			fsblocksize, fssectsize;
+	struct fs_path		*fs;
+	static bool		tab_init;
+	bool			dumped_flags = false;
+	int			dflag, lflag, rflag;
+
+	init_cvtnum(&fsblocksize, &fssectsize);
+
+	dflag = lflag = rflag = 0;
+	while ((c = getopt(argc, argv, "dlmn:o:O:rv")) != EOF) {
+		switch (c) {
+		case 'd':	/* data device */
+			dflag = 1;
+			break;
+		case 'l':	/* log device */
+			lflag = 1;
+			break;
+		case 'm':	/* machine readable format */
+			mflag++;
+			break;
+		case 'n':	/* number of extents specified */
+			nflag = cvt_u32(optarg, 10);
+			if (errno)
+				return command_usage(&fsrefcounts_cmd);
+			break;
+		case 'o':	/* minimum owners */
+			min_owners = cvt_u64(optarg, 10);
+			if (errno)
+				return command_usage(&fsrefcounts_cmd);
+			if (min_owners < 1) {
+				fprintf(stderr,
+		_("min_owners must be greater than zero.\n"));
+				exitcode = 1;
+				return 0;
+			}
+			break;
+		case 'O':	/* maximum owners */
+			max_owners = cvt_u64(optarg, 10);
+			if (errno)
+				return command_usage(&fsrefcounts_cmd);
+			if (max_owners < 1) {
+				fprintf(stderr,
+		_("max_owners must be greater than zero.\n"));
+				exitcode = 1;
+				return 0;
+			}
+			break;
+		case 'r':	/* rt device */
+			rflag = 1;
+			break;
+		case 'v':	/* Verbose output */
+			vflag++;
+			break;
+		default:
+			exitcode = 1;
+			return command_usage(&fsrefcounts_cmd);
+		}
+	}
+
+	if ((dflag + lflag + rflag > 1) || (mflag > 0 && vflag > 0) ||
+	    (argc > optind && dflag + lflag + rflag == 0)) {
+		exitcode = 1;
+		return command_usage(&fsrefcounts_cmd);
+	}
+
+	if (argc > optind) {
+		start = cvtnum(fsblocksize, fssectsize, argv[optind]);
+		if (start < 0) {
+			fprintf(stderr,
+				_("Bad refcount start_bblock %s.\n"),
+				argv[optind]);
+			exitcode = 1;
+			return 0;
+		}
+		start <<= BBSHIFT;
+	}
+
+	if (argc > optind + 1) {
+		end = cvtnum(fsblocksize, fssectsize, argv[optind + 1]);
+		if (end < 0) {
+			fprintf(stderr,
+				_("Bad refcount end_bblock %s.\n"),
+				argv[optind + 1]);
+			exitcode = 1;
+			return 0;
+		}
+		end <<= BBSHIFT;
+	}
+
+	if (vflag) {
+		c = -xfrog_geometry(file->fd, &fsgeo);
+		if (c) {
+			fprintf(stderr,
+				_("%s: can't get geometry [\"%s\"]: %s\n"),
+				progname, file->name, strerror(c));
+			exitcode = 1;
+			return 0;
+		}
+	}
+
+	map_size = nflag ? nflag : 131072 / sizeof(struct xfs_getfsrefs);
+	head = malloc(xfs_getfsrefs_sizeof(map_size));
+	if (head == NULL) {
+		fprintf(stderr, _("%s: malloc of %llu bytes failed.\n"),
+				progname,
+				(unsigned long long)xfs_getfsrefs_sizeof(map_size));
+		exitcode = 1;
+		return 0;
+	}
+
+	memset(head, 0, sizeof(*head));
+	l = head->fch_keys;
+	h = head->fch_keys + 1;
+	if (dflag) {
+		l->fcr_device = h->fcr_device = file->fs_path.fs_datadev;
+	} else if (lflag) {
+		l->fcr_device = h->fcr_device = file->fs_path.fs_logdev;
+	} else if (rflag) {
+		l->fcr_device = h->fcr_device = file->fs_path.fs_rtdev;
+	} else {
+		l->fcr_device = 0;
+		h->fcr_device = UINT_MAX;
+	}
+	l->fcr_physical = start;
+	h->fcr_physical = end;
+	h->fcr_owners = ULLONG_MAX;
+	h->fcr_flags = UINT_MAX;
+
+	/*
+	 * If this is an XFS filesystem, remember the data device.
+	 * (We report AG number/block for data device extents on XFS).
+	 */
+	if (!tab_init) {
+		fs_table_initialise(0, NULL, 0, NULL);
+		tab_init = true;
+	}
+	fs = fs_table_lookup(file->name, FS_MOUNT_POINT);
+	xfs_data_dev = fs ? fs->fs_datadev : 0;
+
+	head->fch_count = map_size;
+	do {
+		/* Get some extents */
+		i = ioctl(file->fd, XFS_IOC_GETFSREFCOUNTS, head);
+		if (i < 0) {
+			fprintf(stderr, _("%s: xfsctl(XFS_IOC_GETFSREFCOUNTS)"
+				" iflags=0x%x [\"%s\"]: %s\n"),
+				progname, head->fch_iflags, file->name,
+				strerror(errno));
+			free(head);
+			exitcode = 1;
+			return 0;
+		}
+
+		if (head->fch_entries == 0)
+			break;
+
+		if (vflag)
+			dump_refcounts_verbose(&nr, min_owners, max_owners,
+					head, &dumped_flags, &fsgeo);
+		else if (mflag)
+			dump_refcounts_machine(&nr, min_owners, max_owners,
+					head);
+		else
+			dump_refcounts(&nr, min_owners, max_owners, head);
+
+		p = &head->fch_recs[head->fch_entries - 1];
+		if (p->fcr_flags & FCR_OF_LAST)
+			break;
+		xfs_getfsrefs_advance(head);
+	} while (true);
+
+	if (dumped_flags)
+		dump_verbose_key();
+
+	free(head);
+	return 0;
+}
+
+void
+fsrefcounts_init(void)
+{
+	fsrefcounts_cmd.name = "fsrefcounts";
+	fsrefcounts_cmd.cfunc = fsrefcounts_f;
+	fsrefcounts_cmd.argmin = 0;
+	fsrefcounts_cmd.argmax = -1;
+	fsrefcounts_cmd.flags = CMD_NOMAP_OK | CMD_FLAG_FOREIGN_OK;
+	fsrefcounts_cmd.args = _("[-d|-l|-r] [-m|-v] [-n nx] [start] [end]");
+	fsrefcounts_cmd.oneline = _("print filesystem owner counts for a range of blocks");
+	fsrefcounts_cmd.help = fsrefcounts_help;
+
+	add_command(&fsrefcounts_cmd);
+}
diff --git a/io/init.c b/io/init.c
index dee09b56333..b13847e8139 100644
--- a/io/init.c
+++ b/io/init.c
@@ -59,6 +59,7 @@  init_commands(void)
 	freeze_init();
 	fsmap_init();
 	fsuuid_init();
+	fsrefcounts_init();
 	fsync_init();
 	getrusage_init();
 	help_init();
diff --git a/io/io.h b/io/io.h
index 61920fcee7f..9b2b8c4c4ab 100644
--- a/io/io.h
+++ b/io/io.h
@@ -191,3 +191,4 @@  extern void		crc32cselftest_init(void);
 extern void		bulkstat_init(void);
 extern void		atomicupdate_init(void);
 extern void		aginfo_init(void);
+extern void		fsrefcounts_init(void);
diff --git a/man/man8/xfs_io.8 b/man/man8/xfs_io.8
index 4ae47555ae3..411144151a1 100644
--- a/man/man8/xfs_io.8
+++ b/man/man8/xfs_io.8
@@ -1322,6 +1322,94 @@  Only available in expert mode and requires privileges.
 .B thaw
 Undo the effects of a filesystem freeze operation.
 Only available in expert mode and requires privileges.
+
+.TP
+.BI "fsrefcounts [ \-d | \-l | \-r ] [ \-m | \-v ] [ \-n " nx " ] [ \-o " min_owners " ] [ \-O " max_owners " ] [ " start " ] [ " end " ]
+Prints the number of owners of disk extents used by the filesystem hosting the
+current file.
+The listing does not include free blocks.
+Each line of the listings takes the following form:
+.PP
+.RS
+.IR extent ": " major ":" minor " [" startblock .. endblock "]: " owners " " length
+.PP
+All blocks, offsets, and lengths are specified in units of 512-byte
+blocks, no matter what the filesystem's block size is.
+The optional
+.I start
+and
+.I end
+arguments can be used to constrain the output to a particular range of
+disk blocks.
+If these two options are specified, exactly one of
+.BR "-d" ", " "-l" ", or " "-r"
+must also be set.
+.RE
+.RS 1.0i
+.PD 0
+.TP
+.BI \-d
+Display only extents from the data device.
+This option only applies for XFS filesystems.
+.TP
+.BI \-l
+Display only extents from the external log device.
+This option only applies to XFS filesystems.
+.TP
+.BI \-r
+Display only extents from the realtime device.
+This option only applies to XFS filesystems.
+.TP
+.BI \-m
+Display results in a machine readable format (CSV).
+This option is not compatible with the
+.B \-v
+flag.
+The columns of the output are: extent number, device major, device minor,
+physical start, physical end, number of owners, length.
+The start, end, and length numbers are provided in units of 512b.
+
+.TP
+.BI \-n " num_extents"
+If this option is given,
+.B fsrefcounts
+obtains the extent list of the file in groups of
+.I num_extents
+extents.
+In the absence of
+.BR "-n" ", " "fsrefcounts"
+queries the system for extents in groups of 131,072 records.
+.TP
+.BI \-o " min_owners"
+Only print extents having at least this many owners.
+This argument must be in the range 1 to 2^64-1.
+The default value is 1.
+.TP
+.BI \-O " max_owners"
+Only print extents having this many or fewer owners.
+This argument must be in the range 1 to 2^64-1.
+There is no limit by default.
+.TP
+.B \-v
+Shows verbose information.
+When this flag is specified, additional AG specific information is
+appended to each line in the following form:
+.IP
+.RS 1.2i
+.IR agno " (" startagblock .. endagblock ") " nblocks " " flags
+.RE
+.IP
+A second
+.B \-v
+option will print out the
+.I flags
+legend.
+This option is not compatible with the
+.B \-m
+flag.
+.RE
+.PD
+
 .TP
 .BI "inject [ " tag " ]"
 Inject errors into a filesystem to observe filesystem behavior at