diff mbox series

[RFC,v2,2/3] xfs: distinguish between inobt and finobt magic values

Message ID 20190128152034.21080-3-bfoster@redhat.com (mailing list archive)
State Superseded
Headers show
Series xfs: fix [f]inobt magic value verification | expand

Commit Message

Brian Foster Jan. 28, 2019, 3:20 p.m. UTC
The inode btree verifier code is shared between the inode btree and
free inode btree because the underlying metadata formats are
essentially equivalent. A side effect of this is that the verifier
cannot determine whether a particular btree block should have an
inobt or finobt magic value.

This logic allows an unfortunate xfs_repair bug to escape detection
where certain level > 0 nodes of the finobt are stamped with inobt
magic by xfs_repair finobt reconstruction. This is fortunately not a
severe problem since the inode btree magic values do not contribute
to any changes in kernel behavior, but we do need a means to detect
and prevent this problem in the future.

Add a field to xfs_buf_ops to store the v4 and v5 superblock magic
values expected by a particular verifier. Add a helper to check an
on-disk magic value against the value expected by the verifier. Call
the helper from the shared [f]inobt verifier code for magic value
verification. This ensures that the inode btree blocks each have the
appropriate magic value based on specific tree type and superblock
version.

Signed-off-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_ialloc_btree.c | 15 ++++++---------
 fs/xfs/xfs_buf.h                 | 19 +++++++++++++++++++
 2 files changed, 25 insertions(+), 9 deletions(-)

Comments

Dave Chinner Jan. 28, 2019, 10:54 p.m. UTC | #1
On Mon, Jan 28, 2019 at 10:20:33AM -0500, Brian Foster wrote:
> The inode btree verifier code is shared between the inode btree and
> free inode btree because the underlying metadata formats are
> essentially equivalent. A side effect of this is that the verifier
> cannot determine whether a particular btree block should have an
> inobt or finobt magic value.
> 
> This logic allows an unfortunate xfs_repair bug to escape detection
> where certain level > 0 nodes of the finobt are stamped with inobt
> magic by xfs_repair finobt reconstruction. This is fortunately not a
> severe problem since the inode btree magic values do not contribute
> to any changes in kernel behavior, but we do need a means to detect
> and prevent this problem in the future.
> 
> Add a field to xfs_buf_ops to store the v4 and v5 superblock magic
> values expected by a particular verifier. Add a helper to check an
> on-disk magic value against the value expected by the verifier. Call
> the helper from the shared [f]inobt verifier code for magic value
> verification. This ensures that the inode btree blocks each have the
> appropriate magic value based on specific tree type and superblock
> version.

I still really don't like this code :(

> @@ -387,4 +388,22 @@ extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
>  
>  int xfs_buf_ensure_ops(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
>  
> +/*
> + * Verify an on-disk magic value against the magic value specified in the
> + * verifier structure.
> + */
> +static inline bool
> +xfs_buf_ops_verify_magic(
> +	struct xfs_buf		*bp,
> +	__be32			dmagic,
> +	bool			crc)
> +{
> +	if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[crc])))
> +		return false;
> +	return dmagic == cpu_to_be32(bp->b_ops->magic[crc]);
> +}
> +#define xfs_verify_magic(bp, dmagic)		\
> +	xfs_buf_ops_verify_magic(bp, dmagic,	\
> +			xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))

That, IMO, is even worse....

Ok, here's a different option. Store all the magic numbers in a pair
of tables - one for v4, one for v5. They can be static const and
in on-disk format.

Then use some simple 1-line wrappers for the verifier definitions to
specify the table index for the magic numbers. e.g:

__be32 xfs_disk_magic(mp, idx)
{
	if (xfs_sb_version_hascrc(&mp->m_sb))
		return xfs_v5_disk_magic[idx];
	return xfs_v4_disk_magic[idx];
}

[.....]

__xfs_inobt_read_verify(bp, magic_idx)
{
	magic = xfs_disk_magic(mp, magic_idx);
	.....
}

__xfs_inobt_write_verify(bp, magic_idx)
{
	magic = xfs_disk_magic(mp, magic_idx);
	.....
}

__xfs_inobt_struct_verify(bp, magic_idx)
{
	magic = xfs_disk_magic(mp, magic_idx);
	.....
}

[ or drive the magic number resolution further inwards to where it
is actually needed. ]

xfs_inobt_read_verify(bp)
{
	return __xfs_inobt_read_verify(bp, INOBT);
}

xfs_inobt_write_verify(bp)
{
	return __xfs_inobt_write_verify(bp, INOBT);
}

xfs_inobt_struct_verify(bp)
{
	return __xfs_inobt_struct_verify(bp, INOBT);
}

xfs_finobt_read_verify(bp)
{
	return __xfs_inobt_read_verify(bp, FINOBT);
}

xfs_finobt_write_verify(bp)
{
	return __xfs_inobt_write_verify(bp, FINOBT);
}

xfs_finobt_struct_verify(bp)
{
	return __xfs_inobt_struct_verify(bp, FINOBT);
}

And this can be extended to all the verifiers - it handles crc and
non CRC variants transparently, and can be used for the cnt/bno free
space btrees, too.

Yes, it's a bit more boiler plate code, but IMO it is easier to
follow and understand than encoding multiple magic numbers into the
verifier and adding a dependency on the buffer having an ops
structure attached to be able to check the magic number...

Cheers,

Dave.
Brian Foster Jan. 29, 2019, 2:01 p.m. UTC | #2
On Tue, Jan 29, 2019 at 09:54:26AM +1100, Dave Chinner wrote:
> On Mon, Jan 28, 2019 at 10:20:33AM -0500, Brian Foster wrote:
> > The inode btree verifier code is shared between the inode btree and
> > free inode btree because the underlying metadata formats are
> > essentially equivalent. A side effect of this is that the verifier
> > cannot determine whether a particular btree block should have an
> > inobt or finobt magic value.
> > 
> > This logic allows an unfortunate xfs_repair bug to escape detection
> > where certain level > 0 nodes of the finobt are stamped with inobt
> > magic by xfs_repair finobt reconstruction. This is fortunately not a
> > severe problem since the inode btree magic values do not contribute
> > to any changes in kernel behavior, but we do need a means to detect
> > and prevent this problem in the future.
> > 
> > Add a field to xfs_buf_ops to store the v4 and v5 superblock magic
> > values expected by a particular verifier. Add a helper to check an
> > on-disk magic value against the value expected by the verifier. Call
> > the helper from the shared [f]inobt verifier code for magic value
> > verification. This ensures that the inode btree blocks each have the
> > appropriate magic value based on specific tree type and superblock
> > version.
> 
> I still really don't like this code :(
> 

Enough to explain why, perhaps?

> > @@ -387,4 +388,22 @@ extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
> >  
> >  int xfs_buf_ensure_ops(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
> >  
> > +/*
> > + * Verify an on-disk magic value against the magic value specified in the
> > + * verifier structure.
> > + */
> > +static inline bool
> > +xfs_buf_ops_verify_magic(
> > +	struct xfs_buf		*bp,
> > +	__be32			dmagic,
> > +	bool			crc)
> > +{
> > +	if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[crc])))
> > +		return false;
> > +	return dmagic == cpu_to_be32(bp->b_ops->magic[crc]);
> > +}
> > +#define xfs_verify_magic(bp, dmagic)		\
> > +	xfs_buf_ops_verify_magic(bp, dmagic,	\
> > +			xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
> 
> That, IMO, is even worse....
> 

Worse than what and why?

Note that I've removed the endian conversion from here. Otherwise, this
is basically just a wrapper to factor out the sb version lookup and
provide some common error checking.

> Ok, here's a different option. Store all the magic numbers in a pair
> of tables - one for v4, one for v5. They can be static const and
> in on-disk format.
> 
> Then use some simple 1-line wrappers for the verifier definitions to
> specify the table index for the magic numbers. e.g:
> 
> __be32 xfs_disk_magic(mp, idx)
> {
> 	if (xfs_sb_version_hascrc(&mp->m_sb))
> 		return xfs_v5_disk_magic[idx];
> 	return xfs_v4_disk_magic[idx];
> }
> 

Seems reasonable enough... but where/how is the index encoded?

> [.....]
> 
> __xfs_inobt_read_verify(bp, magic_idx)
> {
> 	magic = xfs_disk_magic(mp, magic_idx);
> 	.....
> }
> 
> __xfs_inobt_write_verify(bp, magic_idx)
> {
> 	magic = xfs_disk_magic(mp, magic_idx);
> 	.....
> }
> 
> __xfs_inobt_struct_verify(bp, magic_idx)
> {
> 	magic = xfs_disk_magic(mp, magic_idx);
> 	.....
> }
> 
> [ or drive the magic number resolution further inwards to where it
> is actually needed. ]
> 
> xfs_inobt_read_verify(bp)
> {
> 	return __xfs_inobt_read_verify(bp, INOBT);
> }
> 
> xfs_inobt_write_verify(bp)
> {
> 	return __xfs_inobt_write_verify(bp, INOBT);
> }
> 
> xfs_inobt_struct_verify(bp)
> {
> 	return __xfs_inobt_struct_verify(bp, INOBT);
> }
> 
> xfs_finobt_read_verify(bp)
> {
> 	return __xfs_inobt_read_verify(bp, FINOBT);
> }
> 
> xfs_finobt_write_verify(bp)
> {
> 	return __xfs_inobt_write_verify(bp, FINOBT);
> }
> 
> xfs_finobt_struct_verify(bp)
> {
> 	return __xfs_inobt_struct_verify(bp, FINOBT);
> }
> 
> And this can be extended to all the verifiers - it handles crc and
> non CRC variants transparently, and can be used for the cnt/bno free
> space btrees, too.
> 
> Yes, it's a bit more boiler plate code, but IMO it is easier to
> follow and understand than encoding multiple magic numbers into the
> verifier and adding a dependency on the buffer having an ops
> structure attached to be able to check the magic number...
> 

This code duplication is what I was hoping to avoid. We already have
similar proliferation of boilerplate code in some of the verifiers that
handle multiple object types. See the appended hunk related to the dir
leaf verifier code, for example.

I agree that the magic value itself is a bit obfuscated with this
change, but that's still the case with a lookup table. I disagree that
an indirected magic value is more difficult to read than nearly a screen
full of similarly named verifier functions. The magic value is a simple
data value, the code above makes it unnecessarily more confusing (at a
glance) to grok which verifiers run which checks. That's just my
experience from folding the code below, fwiw.

Another angle to this is that we don't necessarily have to use the
xfs_buf_ops->magic field for every verifier. I could just add it to the
finobt case, perhaps the directory case below, and leave the rest alone
until we come up with something more agreeable. Then it basically just
supports a couple corner cases and is easy enough to remove down the
road.

Brian

--- 8< ---

diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 1728a3e6f5cf..f602307d2fa0 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -142,41 +142,32 @@ xfs_dir3_leaf_check_int(
  */
 static xfs_failaddr_t
 xfs_dir3_leaf_verify(
-	struct xfs_buf		*bp,
-	uint16_t		magic)
+	struct xfs_buf		*bp)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
 
-	ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
+	if (!xfs_verify_magic(bp, be16_to_cpu(leaf->hdr.info.magic)))
+		return __this_address;
 
 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
 		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
-		uint16_t		magic3;
 
-		magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
-							 : XFS_DIR3_LEAFN_MAGIC;
-
-		if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
-			return __this_address;
+		ASSERT(leaf3->info.hdr.magic == leaf->hdr.info.magic);
 		if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
 			return __this_address;
 		if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
 			return __this_address;
 		if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
 			return __this_address;
-	} else {
-		if (leaf->hdr.info.magic != cpu_to_be16(magic))
-			return __this_address;
 	}
 
 	return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
 }
 
 static void
-__read_verify(
-	struct xfs_buf  *bp,
-	uint16_t	magic)
+xfs_dir3_leaf_read_verify(
+	struct xfs_buf  *bp)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 	xfs_failaddr_t		fa;
@@ -185,23 +176,22 @@ __read_verify(
 	     !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
 	else {
-		fa = xfs_dir3_leaf_verify(bp, magic);
+		fa = xfs_dir3_leaf_verify(bp);
 		if (fa)
 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
 	}
 }
 
 static void
-__write_verify(
-	struct xfs_buf  *bp,
-	uint16_t	magic)
+xfs_dir3_leaf_write_verify(
+	struct xfs_buf  *bp)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 	struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
 	xfs_failaddr_t		fa;
 
-	fa = xfs_dir3_leaf_verify(bp, magic);
+	fa = xfs_dir3_leaf_verify(bp);
 	if (fa) {
 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
 		return;
@@ -216,60 +206,20 @@ __write_verify(
 	xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
 }
 
-static xfs_failaddr_t
-xfs_dir3_leaf1_verify(
-	struct xfs_buf	*bp)
-{
-	return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAF1_MAGIC);
-}
-
-static void
-xfs_dir3_leaf1_read_verify(
-	struct xfs_buf	*bp)
-{
-	__read_verify(bp, XFS_DIR2_LEAF1_MAGIC);
-}
-
-static void
-xfs_dir3_leaf1_write_verify(
-	struct xfs_buf	*bp)
-{
-	__write_verify(bp, XFS_DIR2_LEAF1_MAGIC);
-}
-
-static xfs_failaddr_t
-xfs_dir3_leafn_verify(
-	struct xfs_buf	*bp)
-{
-	return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAFN_MAGIC);
-}
-
-static void
-xfs_dir3_leafn_read_verify(
-	struct xfs_buf	*bp)
-{
-	__read_verify(bp, XFS_DIR2_LEAFN_MAGIC);
-}
-
-static void
-xfs_dir3_leafn_write_verify(
-	struct xfs_buf	*bp)
-{
-	__write_verify(bp, XFS_DIR2_LEAFN_MAGIC);
-}
-
 const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
 	.name = "xfs_dir3_leaf1",
-	.verify_read = xfs_dir3_leaf1_read_verify,
-	.verify_write = xfs_dir3_leaf1_write_verify,
-	.verify_struct = xfs_dir3_leaf1_verify,
+	.magic = { XFS_DIR2_LEAF1_MAGIC, XFS_DIR3_LEAF1_MAGIC },
+	.verify_read = xfs_dir3_leaf_read_verify,
+	.verify_write = xfs_dir3_leaf_write_verify,
+	.verify_struct = xfs_dir3_leaf_verify,
 };
 
 const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
 	.name = "xfs_dir3_leafn",
-	.verify_read = xfs_dir3_leafn_read_verify,
-	.verify_write = xfs_dir3_leafn_write_verify,
-	.verify_struct = xfs_dir3_leafn_verify,
+	.magic = { XFS_DIR2_LEAFN_MAGIC, XFS_DIR3_LEAFN_MAGIC },
+	.verify_read = xfs_dir3_leaf_read_verify,
+	.verify_write = xfs_dir3_leaf_write_verify,
+	.verify_struct = xfs_dir3_leaf_verify,
 };
 
 int
Dave Chinner Jan. 29, 2019, 9:16 p.m. UTC | #3
On Tue, Jan 29, 2019 at 09:01:36AM -0500, Brian Foster wrote:
> On Tue, Jan 29, 2019 at 09:54:26AM +1100, Dave Chinner wrote:
> > On Mon, Jan 28, 2019 at 10:20:33AM -0500, Brian Foster wrote:
> > > The inode btree verifier code is shared between the inode btree and
> > > free inode btree because the underlying metadata formats are
> > > essentially equivalent. A side effect of this is that the verifier
> > > cannot determine whether a particular btree block should have an
> > > inobt or finobt magic value.
> > > 
> > > This logic allows an unfortunate xfs_repair bug to escape detection
> > > where certain level > 0 nodes of the finobt are stamped with inobt
> > > magic by xfs_repair finobt reconstruction. This is fortunately not a
> > > severe problem since the inode btree magic values do not contribute
> > > to any changes in kernel behavior, but we do need a means to detect
> > > and prevent this problem in the future.
> > > 
> > > Add a field to xfs_buf_ops to store the v4 and v5 superblock magic
> > > values expected by a particular verifier. Add a helper to check an
> > > on-disk magic value against the value expected by the verifier. Call
> > > the helper from the shared [f]inobt verifier code for magic value
> > > verification. This ensures that the inode btree blocks each have the
> > > appropriate magic value based on specific tree type and superblock
> > > version.
> > 
> > I still really don't like this code :(
> > 
> 
> Enough to explain why, perhaps?

I did in the past thread - it adds runtime overhead in performance
critical paths, and it requires verfiers to have a dependecy on
bp->b_ops being set.

> > > @@ -387,4 +388,22 @@ extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
> > >  
> > >  int xfs_buf_ensure_ops(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
> > >  
> > > +/*
> > > + * Verify an on-disk magic value against the magic value specified in the
> > > + * verifier structure.
> > > + */
> > > +static inline bool
> > > +xfs_buf_ops_verify_magic(
> > > +	struct xfs_buf		*bp,
> > > +	__be32			dmagic,
> > > +	bool			crc)
> > > +{
> > > +	if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[crc])))
> > > +		return false;
> > > +	return dmagic == cpu_to_be32(bp->b_ops->magic[crc]);
> > > +}
> > > +#define xfs_verify_magic(bp, dmagic)		\
> > > +	xfs_buf_ops_verify_magic(bp, dmagic,	\
> > > +			xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
> > 
> > That, IMO, is even worse....
> > 
> 
> Worse than what and why?

Worse that the last patch, because it now adds a needless macro that
only serves to obfuscate the code. This:

static inline bool
xfs_verify_magic(
	struct xfs_mount	*mp,
	__be32			dmagic,
	int			idx)
{
	__be32			magic;

	if (xfs_sb_version_hascrc(&mp->m_sb))
		magic = xfs_v5_disk_magic[idx];
	magic = xfs_v4_disk_magic[idx];

	return dmagic == magic;
}

is much cleaner and easier to understand....

> Note that I've removed the endian conversion from here. Otherwise, this
> is basically just a wrapper to factor out the sb version lookup and
> provide some common error checking.
> 
> > Ok, here's a different option. Store all the magic numbers in a pair
> > of tables - one for v4, one for v5. They can be static const and
> > in on-disk format.
> > 
> > Then use some simple 1-line wrappers for the verifier definitions to
> > specify the table index for the magic numbers. e.g:
> > 
> > __be32 xfs_disk_magic(mp, idx)
> > {
> > 	if (xfs_sb_version_hascrc(&mp->m_sb))
> > 		return xfs_v5_disk_magic[idx];
> > 	return xfs_v4_disk_magic[idx];
> > }
> > 
> 
> Seems reasonable enough... but where/how is the index encoded?

I was thinking in fs/xfs/libxfs/xfs_types.[ch], via an index similar
to xfs_btnum_t indexes (could even use it to begin with).

static const xfs_v5_disk_magic[] = {
	cpu_to_be32(XFS_ABTB_CRC_MAGIC),
	cpu_to_be32(XFS_ABTC_CRC_MAGIC),
	cpu_to_be32(XFS_ITB_CRC_MAGIC),
	cpu_to_be32(XFS_FITB_CRC_MAGIC),
	.....
}

You could do the same thing to the verfier op definition to
remove the need on-the-fly endian conversion just for the magic
number checks, which gets rid of that concern.

> > And this can be extended to all the verifiers - it handles crc and
> > non CRC variants transparently, and can be used for the cnt/bno free
> > space btrees, too.
> > 
> > Yes, it's a bit more boiler plate code, but IMO it is easier to
> > follow and understand than encoding multiple magic numbers into the
> > verifier and adding a dependency on the buffer having an ops
> > structure attached to be able to check the magic number...
> 
> This code duplication is what I was hoping to avoid. We already have
> similar proliferation of boilerplate code in some of the verifiers that
> handle multiple object types. See the appended hunk related to the dir
> leaf verifier code, for example.

Personally I prefer code duplication first, then factor later once
the code settles down. In hindsight, we've probably factored the
verifiers too much too soon...

> I agree that the magic value itself is a bit obfuscated with this
> change, but that's still the case with a lookup table.

The difference with the lookup table is that you know what the magic
number is supposed to be by looking at the code that calls it...

> Another angle to this is that we don't necessarily have to use the
> xfs_buf_ops->magic field for every verifier. I could just add it to the
> finobt case, perhaps the directory case below, and leave the rest alone
> until we come up with something more agreeable. Then it basically just
> supports a couple corner cases and is easy enough to remove down the
> road.

I'd like all the verifiers to use the same mechanism so we maintain
consistency between them.

> --- 8< ---
> 
> diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
> index 1728a3e6f5cf..f602307d2fa0 100644
> --- a/fs/xfs/libxfs/xfs_dir2_leaf.c
> +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
> @@ -142,41 +142,32 @@ xfs_dir3_leaf_check_int(
>   */
>  static xfs_failaddr_t
>  xfs_dir3_leaf_verify(
> -	struct xfs_buf		*bp,
> -	uint16_t		magic)
> +	struct xfs_buf		*bp)
>  {
>  	struct xfs_mount	*mp = bp->b_target->bt_mount;
>  	struct xfs_dir2_leaf	*leaf = bp->b_addr;
>  
> -	ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
> +	if (!xfs_verify_magic(bp, be16_to_cpu(leaf->hdr.info.magic)))
> +		return __this_address;
>  
>  	if (xfs_sb_version_hascrc(&mp->m_sb)) {
>  		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
> -		uint16_t		magic3;
>  
> -		magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
> -							 : XFS_DIR3_LEAFN_MAGIC;
> -
> -		if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
> -			return __this_address;
> +		ASSERT(leaf3->info.hdr.magic == leaf->hdr.info.magic);
>  		if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
>  			return __this_address;
>  		if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
>  			return __this_address;
>  		if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
>  			return __this_address;
> -	} else {
> -		if (leaf->hdr.info.magic != cpu_to_be16(magic))
> -			return __this_address;
>  	}
>  
>  	return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
>  }

.....

Ok, that removes a lot more existing code than I ever thought it
would. If you clean up the macro mess and use encoded magic numbers
in the ops structure, then consider my objections removed. :)

(And that then leads to factoring of xfs_dablk_info_verify() as dir
leaf, danode and attribute leaf blocks all use the same struct
xfs_da3_blkinfo header, and now the magic number is abstracted they
can use the same code....)

Brian, to help prevent stupid people like me wasting your time in
future, can you post the entire patch set you have so we can see the
same picture you have for the overall change, even if there's only a
small chunk you are proposing for merge? That way we'll be able to
judge the change on the merits of the entire work, rather than just
the small chunk that was posted? 

Cheers,

Dave.
Brian Foster Jan. 30, 2019, 1:05 a.m. UTC | #4
On Wed, Jan 30, 2019 at 08:16:55AM +1100, Dave Chinner wrote:
> On Tue, Jan 29, 2019 at 09:01:36AM -0500, Brian Foster wrote:
> > On Tue, Jan 29, 2019 at 09:54:26AM +1100, Dave Chinner wrote:
> > > On Mon, Jan 28, 2019 at 10:20:33AM -0500, Brian Foster wrote:
> > > > The inode btree verifier code is shared between the inode btree and
> > > > free inode btree because the underlying metadata formats are
> > > > essentially equivalent. A side effect of this is that the verifier
> > > > cannot determine whether a particular btree block should have an
> > > > inobt or finobt magic value.
> > > > 
> > > > This logic allows an unfortunate xfs_repair bug to escape detection
> > > > where certain level > 0 nodes of the finobt are stamped with inobt
> > > > magic by xfs_repair finobt reconstruction. This is fortunately not a
> > > > severe problem since the inode btree magic values do not contribute
> > > > to any changes in kernel behavior, but we do need a means to detect
> > > > and prevent this problem in the future.
> > > > 
> > > > Add a field to xfs_buf_ops to store the v4 and v5 superblock magic
> > > > values expected by a particular verifier. Add a helper to check an
> > > > on-disk magic value against the value expected by the verifier. Call
> > > > the helper from the shared [f]inobt verifier code for magic value
> > > > verification. This ensures that the inode btree blocks each have the
> > > > appropriate magic value based on specific tree type and superblock
> > > > version.
> > > 
> > > I still really don't like this code :(
> > > 
> > 
> > Enough to explain why, perhaps?
> 
> I did in the past thread - it adds runtime overhead in performance
> critical paths, and it requires verfiers to have a dependecy on
> bp->b_ops being set.
> 

Fair points, but seem like nits to me when you consider the unfortunate
lack of decent alternatives. And the ->b_ops thing is really just a
happenstance bit of scrub logic that needs to be tweaked.

> > > > @@ -387,4 +388,22 @@ extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
> > > >  
> > > >  int xfs_buf_ensure_ops(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
> > > >  
> > > > +/*
> > > > + * Verify an on-disk magic value against the magic value specified in the
> > > > + * verifier structure.
> > > > + */
> > > > +static inline bool
> > > > +xfs_buf_ops_verify_magic(
> > > > +	struct xfs_buf		*bp,
> > > > +	__be32			dmagic,
> > > > +	bool			crc)
> > > > +{
> > > > +	if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[crc])))
> > > > +		return false;
> > > > +	return dmagic == cpu_to_be32(bp->b_ops->magic[crc]);
> > > > +}
> > > > +#define xfs_verify_magic(bp, dmagic)		\
> > > > +	xfs_buf_ops_verify_magic(bp, dmagic,	\
> > > > +			xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
> > > 
> > > That, IMO, is even worse....
> > > 
> > 
> > Worse than what and why?
> 
> Worse that the last patch, because it now adds a needless macro that
> only serves to obfuscate the code. This:
> 

That is easy enough to address (using your logic below) regardless of
how we access the magic value.

> static inline bool
> xfs_verify_magic(
> 	struct xfs_mount	*mp,
> 	__be32			dmagic,
> 	int			idx)
> {
> 	__be32			magic;
> 
> 	if (xfs_sb_version_hascrc(&mp->m_sb))
> 		magic = xfs_v5_disk_magic[idx];
> 	magic = xfs_v4_disk_magic[idx];
> 
> 	return dmagic == magic;
> }
> 
> is much cleaner and easier to understand....
> 
> > Note that I've removed the endian conversion from here. Otherwise, this
> > is basically just a wrapper to factor out the sb version lookup and
> > provide some common error checking.
> > 
> > > Ok, here's a different option. Store all the magic numbers in a pair
> > > of tables - one for v4, one for v5. They can be static const and
> > > in on-disk format.
> > > 
> > > Then use some simple 1-line wrappers for the verifier definitions to
> > > specify the table index for the magic numbers. e.g:
> > > 
> > > __be32 xfs_disk_magic(mp, idx)
> > > {
> > > 	if (xfs_sb_version_hascrc(&mp->m_sb))
> > > 		return xfs_v5_disk_magic[idx];
> > > 	return xfs_v4_disk_magic[idx];
> > > }
> > > 
> > 
> > Seems reasonable enough... but where/how is the index encoded?
> 
> I was thinking in fs/xfs/libxfs/xfs_types.[ch], via an index similar
> to xfs_btnum_t indexes (could even use it to begin with).
> 
> static const xfs_v5_disk_magic[] = {
> 	cpu_to_be32(XFS_ABTB_CRC_MAGIC),
> 	cpu_to_be32(XFS_ABTC_CRC_MAGIC),
> 	cpu_to_be32(XFS_ITB_CRC_MAGIC),
> 	cpu_to_be32(XFS_FITB_CRC_MAGIC),
> 	.....
> }
> 
> You could do the same thing to the verfier op definition to
> remove the need on-the-fly endian conversion just for the magic
> number checks, which gets rid of that concern.
> 
> > > And this can be extended to all the verifiers - it handles crc and
> > > non CRC variants transparently, and can be used for the cnt/bno free
> > > space btrees, too.
> > > 
> > > Yes, it's a bit more boiler plate code, but IMO it is easier to
> > > follow and understand than encoding multiple magic numbers into the
> > > verifier and adding a dependency on the buffer having an ops
> > > structure attached to be able to check the magic number...
> > 
> > This code duplication is what I was hoping to avoid. We already have
> > similar proliferation of boilerplate code in some of the verifiers that
> > handle multiple object types. See the appended hunk related to the dir
> > leaf verifier code, for example.
> 
> Personally I prefer code duplication first, then factor later once
> the code settles down. In hindsight, we've probably factored the
> verifiers too much too soon...
> 
> > I agree that the magic value itself is a bit obfuscated with this
> > change, but that's still the case with a lookup table.
> 
> The difference with the lookup table is that you know what the magic
> number is supposed to be by looking at the code that calls it...
> 

Indeed. What I didn't realize until later today is that some verifiers
(xfs_sb_buf_ops, xfs_attr3_leaf_buf_ops, xfs_da3_node_buf_ops) check
already converted in-core structures and thus actually verify against
cpu endian magic values. This means said verifiers would require further
tweaks to either check the underlying buffer, another conversion back to
disk endian, or we'd otherwise need four of these arrays. :/

> > Another angle to this is that we don't necessarily have to use the
> > xfs_buf_ops->magic field for every verifier. I could just add it to the
> > finobt case, perhaps the directory case below, and leave the rest alone
> > until we come up with something more agreeable. Then it basically just
> > supports a couple corner cases and is easy enough to remove down the
> > road.
> 
> I'd like all the verifiers to use the same mechanism so we maintain
> consistency between them.
> 

I'd like that too, but I think we need to make some kind of tradeoff or
compromise to fix this problem given the current, rather ad-hoc nature
of the verifier code. Some check in-core structs, some don't and may or
may not use the compile time conversion optimization.

> > --- 8< ---
> > 
> > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > index 1728a3e6f5cf..f602307d2fa0 100644
> > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c
> > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > @@ -142,41 +142,32 @@ xfs_dir3_leaf_check_int(
> >   */
> >  static xfs_failaddr_t
> >  xfs_dir3_leaf_verify(
> > -	struct xfs_buf		*bp,
> > -	uint16_t		magic)
> > +	struct xfs_buf		*bp)
> >  {
> >  	struct xfs_mount	*mp = bp->b_target->bt_mount;
> >  	struct xfs_dir2_leaf	*leaf = bp->b_addr;
> >  
> > -	ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
> > +	if (!xfs_verify_magic(bp, be16_to_cpu(leaf->hdr.info.magic)))
> > +		return __this_address;
> >  
> >  	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> >  		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
> > -		uint16_t		magic3;
> >  
> > -		magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
> > -							 : XFS_DIR3_LEAFN_MAGIC;
> > -
> > -		if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
> > -			return __this_address;
> > +		ASSERT(leaf3->info.hdr.magic == leaf->hdr.info.magic);
> >  		if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
> >  			return __this_address;
> >  		if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
> >  			return __this_address;
> >  		if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
> >  			return __this_address;
> > -	} else {
> > -		if (leaf->hdr.info.magic != cpu_to_be16(magic))
> > -			return __this_address;
> >  	}
> >  
> >  	return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
> >  }
> 
> .....
> 
> Ok, that removes a lot more existing code than I ever thought it
> would. If you clean up the macro mess and use encoded magic numbers
> in the ops structure, then consider my objections removed. :)
> 

I'll kill off the macro..

By encoded, I assume you mean on-disk order(?). Given that some
verifiers use the cpu endian value, I thought it more clear for the
helper to expect a cpu endian value. We could technically store any
endian we want, including different endian on a per verifier basis and
pass the values all the way through, but I'd find that rather confusing
(and a nightmare to review and maintain).

> (And that then leads to factoring of xfs_dablk_info_verify() as dir
> leaf, danode and attribute leaf blocks all use the same struct
> xfs_da3_blkinfo header, and now the magic number is abstracted they
> can use the same code....)
> 

Not sure I follow..?

> Brian, to help prevent stupid people like me wasting your time in
> future, can you post the entire patch set you have so we can see the
> same picture you have for the overall change, even if there's only a
> small chunk you are proposing for merge? That way we'll be able to
> judge the change on the merits of the entire work, rather than just
> the small chunk that was posted? 
> 

That was the entire patchset at the time. ;) I intentionally made the
isolated finobt change and posted that to try and get big picture
feedback before making mechanical changes to the rest of the verifiers.
I probably had most of the rest done shortly after posting the rfcv2,
but it wasn't tested until today (re: the v1 post) so I just included
the above snippet to demonstrate the cleanup.

Brian

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
Dave Chinner Jan. 30, 2019, 2:15 a.m. UTC | #5
On Tue, Jan 29, 2019 at 08:05:53PM -0500, Brian Foster wrote:
> On Wed, Jan 30, 2019 at 08:16:55AM +1100, Dave Chinner wrote:
> > On Tue, Jan 29, 2019 at 09:01:36AM -0500, Brian Foster wrote:
> > > On Tue, Jan 29, 2019 at 09:54:26AM +1100, Dave Chinner wrote:
> > > I agree that the magic value itself is a bit obfuscated with this
> > > change, but that's still the case with a lookup table.
> > 
> > The difference with the lookup table is that you know what the magic
> > number is supposed to be by looking at the code that calls it...
> > 
> 
> Indeed. What I didn't realize until later today is that some verifiers
> (xfs_sb_buf_ops, xfs_attr3_leaf_buf_ops, xfs_da3_node_buf_ops) check
> already converted in-core structures and thus actually verify against
> cpu endian magic values. This means said verifiers would require further
> tweaks to either check the underlying buffer, another conversion back to
> disk endian, or we'd otherwise need four of these arrays. :/

That was purely convenience, because we had to convert to the incore
header to check a bunch of other stuff, so the magic number got
converted for free.

I'd prefer if we are going to use a generic method of checking magic
numbers that it does it in on-disk format so that we don't need to
convert just for the magic number check.

> > I'd like all the verifiers to use the same mechanism so we maintain
> > consistency between them.
> > 
> 
> I'd like that too, but I think we need to make some kind of tradeoff or
> compromise to fix this problem given the current, rather ad-hoc nature
> of the verifier code. Some check in-core structs, some don't and may or
> may not use the compile time conversion optimization.

Ypup, so lets get them all on to checking the on-disk magic number
before conversion.

> > > --- 8< ---
> > > 
> > > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > index 1728a3e6f5cf..f602307d2fa0 100644
> > > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > @@ -142,41 +142,32 @@ xfs_dir3_leaf_check_int(
> > >   */
> > >  static xfs_failaddr_t
> > >  xfs_dir3_leaf_verify(
> > > -	struct xfs_buf		*bp,
> > > -	uint16_t		magic)
> > > +	struct xfs_buf		*bp)
> > >  {
> > >  	struct xfs_mount	*mp = bp->b_target->bt_mount;
> > >  	struct xfs_dir2_leaf	*leaf = bp->b_addr;
> > >  
> > > -	ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
> > > +	if (!xfs_verify_magic(bp, be16_to_cpu(leaf->hdr.info.magic)))
> > > +		return __this_address;
> > >  
> > >  	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> > >  		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
> > > -		uint16_t		magic3;
> > >  
> > > -		magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
> > > -							 : XFS_DIR3_LEAFN_MAGIC;
> > > -
> > > -		if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
> > > -			return __this_address;
> > > +		ASSERT(leaf3->info.hdr.magic == leaf->hdr.info.magic);
> > >  		if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
> > >  			return __this_address;
> > >  		if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
> > >  			return __this_address;
> > >  		if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
> > >  			return __this_address;
> > > -	} else {
> > > -		if (leaf->hdr.info.magic != cpu_to_be16(magic))
> > > -			return __this_address;
> > >  	}
> > >  
> > >  	return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
> > >  }
> > 
> > .....
> > 
> > Ok, that removes a lot more existing code than I ever thought it
> > would. If you clean up the macro mess and use encoded magic numbers
> > in the ops structure, then consider my objections removed. :)
> > 
> 
> I'll kill off the macro..
> 
> By encoded, I assume you mean on-disk order(?).

Yup.

> > (And that then leads to factoring of xfs_dablk_info_verify() as dir
> > leaf, danode and attribute leaf blocks all use the same struct
> > xfs_da3_blkinfo header, and now the magic number is abstracted they
> > can use the same code....)
> > 
> 
> Not sure I follow..?

They all do the same thing. Taking your converted code:

	if (!xfs_verify_magic(bp, be16_to_cpu(leaf->hdr.info.magic)))
		return __this_address;

	if (xfs_sb_version_hascrc(&mp->m_sb)) {
		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;

		ASSERT(leaf3->info.hdr.magic == leaf->hdr.info.magic);
		if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
			return __this_address;
		if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
			return __this_address;
		if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
			return __this_address;
	}

The only thing they need is mp, &leaf->hdr, and bp. They don't
actually need to know that its a dir2/dir3 leaf block now the magic
number is encoded in bp->b_ops.

i.e. that boiler plate can be factored out of multiple verifiers...

> > Brian, to help prevent stupid people like me wasting your time in
> > future, can you post the entire patch set you have so we can see the
> > same picture you have for the overall change, even if there's only a
> > small chunk you are proposing for merge? That way we'll be able to
> > judge the change on the merits of the entire work, rather than just
> > the small chunk that was posted? 
> > 
> 
> That was the entire patchset at the time. ;) I intentionally made the
> isolated finobt change and posted that to try and get big picture
> feedback before making mechanical changes to the rest of the verifiers.
> I probably had most of the rest done shortly after posting the rfcv2,
> but it wasn't tested until today (re: the v1 post) so I just included
> the above snippet to demonstrate the cleanup.

OK, so somewhat crossed wires while changes were still being made.
Such is life...

Cheers,

Dave.
Brian Foster Jan. 30, 2019, 12:15 p.m. UTC | #6
On Wed, Jan 30, 2019 at 01:15:29PM +1100, Dave Chinner wrote:
> On Tue, Jan 29, 2019 at 08:05:53PM -0500, Brian Foster wrote:
> > On Wed, Jan 30, 2019 at 08:16:55AM +1100, Dave Chinner wrote:
> > > On Tue, Jan 29, 2019 at 09:01:36AM -0500, Brian Foster wrote:
> > > > On Tue, Jan 29, 2019 at 09:54:26AM +1100, Dave Chinner wrote:
> > > > I agree that the magic value itself is a bit obfuscated with this
> > > > change, but that's still the case with a lookup table.
> > > 
> > > The difference with the lookup table is that you know what the magic
> > > number is supposed to be by looking at the code that calls it...
> > > 
> > 
> > Indeed. What I didn't realize until later today is that some verifiers
> > (xfs_sb_buf_ops, xfs_attr3_leaf_buf_ops, xfs_da3_node_buf_ops) check
> > already converted in-core structures and thus actually verify against
> > cpu endian magic values. This means said verifiers would require further
> > tweaks to either check the underlying buffer, another conversion back to
> > disk endian, or we'd otherwise need four of these arrays. :/
> 
> That was purely convenience, because we had to convert to the incore
> header to check a bunch of other stuff, so the magic number got
> converted for free.
> 

I think that applies to the first two cases noted above. The
xfs_da3_node_verify() case is a bit more involved conceptually because
we call out to another indirect function to do the conversion. I think
we can ultimately use hdr for the magic check just the same as the
others because either way the block is headed by an xfs_da_blkinfo, it
just takes some thought to grok from the verifier context (and thus adds
minor maintenance burden if this code changes again down the road). I'll
try to add a comment there..

> I'd prefer if we are going to use a generic method of checking magic
> numbers that it does it in on-disk format so that we don't need to
> convert just for the magic number check.
> 
> > > I'd like all the verifiers to use the same mechanism so we maintain
> > > consistency between them.
> > > 
> > 
> > I'd like that too, but I think we need to make some kind of tradeoff or
> > compromise to fix this problem given the current, rather ad-hoc nature
> > of the verifier code. Some check in-core structs, some don't and may or
> > may not use the compile time conversion optimization.
> 
> Ypup, so lets get them all on to checking the on-disk magic number
> before conversion.
> 
> > > > --- 8< ---
> > > > 
> > > > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > index 1728a3e6f5cf..f602307d2fa0 100644
> > > > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > @@ -142,41 +142,32 @@ xfs_dir3_leaf_check_int(
> > > >   */
> > > >  static xfs_failaddr_t
> > > >  xfs_dir3_leaf_verify(
> > > > -	struct xfs_buf		*bp,
> > > > -	uint16_t		magic)
> > > > +	struct xfs_buf		*bp)
> > > >  {
> > > >  	struct xfs_mount	*mp = bp->b_target->bt_mount;
> > > >  	struct xfs_dir2_leaf	*leaf = bp->b_addr;
> > > >  
> > > > -	ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
> > > > +	if (!xfs_verify_magic(bp, be16_to_cpu(leaf->hdr.info.magic)))
> > > > +		return __this_address;
> > > >  
> > > >  	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> > > >  		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
> > > > -		uint16_t		magic3;
> > > >  
> > > > -		magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
> > > > -							 : XFS_DIR3_LEAFN_MAGIC;
> > > > -
> > > > -		if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
> > > > -			return __this_address;
> > > > +		ASSERT(leaf3->info.hdr.magic == leaf->hdr.info.magic);
> > > >  		if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
> > > >  			return __this_address;
> > > >  		if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
> > > >  			return __this_address;
> > > >  		if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
> > > >  			return __this_address;
> > > > -	} else {
> > > > -		if (leaf->hdr.info.magic != cpu_to_be16(magic))
> > > > -			return __this_address;
> > > >  	}
> > > >  
> > > >  	return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
> > > >  }
> > > 
> > > .....
> > > 
> > > Ok, that removes a lot more existing code than I ever thought it
> > > would. If you clean up the macro mess and use encoded magic numbers
> > > in the ops structure, then consider my objections removed. :)
> > > 
> > 
> > I'll kill off the macro..
> > 
> > By encoded, I assume you mean on-disk order(?).
> 
> Yup.
> 
> > > (And that then leads to factoring of xfs_dablk_info_verify() as dir
> > > leaf, danode and attribute leaf blocks all use the same struct
> > > xfs_da3_blkinfo header, and now the magic number is abstracted they
> > > can use the same code....)
> > > 
> > 
> > Not sure I follow..?
> 
> They all do the same thing. Taking your converted code:
> 
> 	if (!xfs_verify_magic(bp, be16_to_cpu(leaf->hdr.info.magic)))
> 		return __this_address;
> 
> 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> 		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
> 
> 		ASSERT(leaf3->info.hdr.magic == leaf->hdr.info.magic);
> 		if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
> 			return __this_address;
> 		if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
> 			return __this_address;
> 		if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
> 			return __this_address;
> 	}
> 
> The only thing they need is mp, &leaf->hdr, and bp. They don't
> actually need to know that its a dir2/dir3 leaf block now the magic
> number is encoded in bp->b_ops.
> 
> i.e. that boiler plate can be factored out of multiple verifiers...
> 

Ok, I thought you meant that there were other, existing functions being
shared rather than referring to a subset of the (modified) verifier
code. I'll take a closer look at this after the other fixups.

> > > Brian, to help prevent stupid people like me wasting your time in
> > > future, can you post the entire patch set you have so we can see the
> > > same picture you have for the overall change, even if there's only a
> > > small chunk you are proposing for merge? That way we'll be able to
> > > judge the change on the merits of the entire work, rather than just
> > > the small chunk that was posted? 
> > > 
> > 
> > That was the entire patchset at the time. ;) I intentionally made the
> > isolated finobt change and posted that to try and get big picture
> > feedback before making mechanical changes to the rest of the verifiers.
> > I probably had most of the rest done shortly after posting the rfcv2,
> > but it wasn't tested until today (re: the v1 post) so I just included
> > the above snippet to demonstrate the cleanup.
> 
> OK, so somewhat crossed wires while changes were still being made.
> Such is life...
> 

*nod*

Brian

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 798269eb4767..c57ecb6b1255 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -260,6 +260,9 @@  xfs_inobt_verify(
 	xfs_failaddr_t		fa;
 	unsigned int		level;
 
+	if (!xfs_verify_magic(bp, block->bb_magic))
+		return __this_address;
+
 	/*
 	 * During growfs operations, we can't verify the exact owner as the
 	 * perag is not fully initialised and hence not attached to the buffer.
@@ -270,18 +273,10 @@  xfs_inobt_verify(
 	 * but beware of the landmine (i.e. need to check pag->pagi_init) if we
 	 * ever do.
 	 */
-	switch (block->bb_magic) {
-	case cpu_to_be32(XFS_IBT_CRC_MAGIC):
-	case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
 		fa = xfs_btree_sblock_v5hdr_verify(bp);
 		if (fa)
 			return fa;
-		/* fall through */
-	case cpu_to_be32(XFS_IBT_MAGIC):
-	case cpu_to_be32(XFS_FIBT_MAGIC):
-		break;
-	default:
-		return __this_address;
 	}
 
 	/* level verification */
@@ -328,6 +323,7 @@  xfs_inobt_write_verify(
 
 const struct xfs_buf_ops xfs_inobt_buf_ops = {
 	.name = "xfs_inobt",
+	.magic = { XFS_IBT_MAGIC, XFS_IBT_CRC_MAGIC },
 	.verify_read = xfs_inobt_read_verify,
 	.verify_write = xfs_inobt_write_verify,
 	.verify_struct = xfs_inobt_verify,
@@ -335,6 +331,7 @@  const struct xfs_buf_ops xfs_inobt_buf_ops = {
 
 const struct xfs_buf_ops xfs_finobt_buf_ops = {
 	.name = "xfs_finobt",
+	.magic = { XFS_FIBT_MAGIC, XFS_FIBT_CRC_MAGIC },
 	.verify_read = xfs_inobt_read_verify,
 	.verify_write = xfs_inobt_write_verify,
 	.verify_struct = xfs_inobt_verify,
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b9f5511ea998..d8757eafba71 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -125,6 +125,7 @@  struct xfs_buf_map {
 
 struct xfs_buf_ops {
 	char *name;
+	uint32_t magic[2];
 	void (*verify_read)(struct xfs_buf *);
 	void (*verify_write)(struct xfs_buf *);
 	xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp);
@@ -387,4 +388,22 @@  extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
 
 int xfs_buf_ensure_ops(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
 
+/*
+ * Verify an on-disk magic value against the magic value specified in the
+ * verifier structure.
+ */
+static inline bool
+xfs_buf_ops_verify_magic(
+	struct xfs_buf		*bp,
+	__be32			dmagic,
+	bool			crc)
+{
+	if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[crc])))
+		return false;
+	return dmagic == cpu_to_be32(bp->b_ops->magic[crc]);
+}
+#define xfs_verify_magic(bp, dmagic)		\
+	xfs_buf_ops_verify_magic(bp, dmagic,	\
+			xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+
 #endif	/* __XFS_BUF_H__ */