diff mbox series

[7/7] xfs: implement block-metadata based data checksums

Message ID 20250203094322.1809766-8-hch@lst.de (mailing list archive)
State New
Headers show
Series [1/7] block: support integrity generation and verification from file systems | expand

Commit Message

Christoph Hellwig Feb. 3, 2025, 9:43 a.m. UTC
This is a quick hack to demonstrate how data checksumming can be
implemented when it can be stored in the out of line metadata for each
logical block.  It builds on top of the previous PI infrastructure
and instead of generating/verifying protection information it simply
generates and verifies a crc32c checksum and stores it in the non-PI
metadata.  It misses a feature bit in the superblock, checking that
enough size is available in the metadata and many other things.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_data_csum.c | 79 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 76 insertions(+), 3 deletions(-)

Comments

Darrick J. Wong Feb. 3, 2025, 10:20 p.m. UTC | #1
On Mon, Feb 03, 2025 at 10:43:11AM +0100, Christoph Hellwig wrote:
> This is a quick hack to demonstrate how data checksumming can be
> implemented when it can be stored in the out of line metadata for each
> logical block.  It builds on top of the previous PI infrastructure
> and instead of generating/verifying protection information it simply
> generates and verifies a crc32c checksum and stores it in the non-PI

PI can do crc32c now?  I thought it could only do that old crc16 from
like 15 years ago and crc64?  If we try to throw crc32c at a device,
won't it then reject the "incorrect" checksums?  Or is there some other
magic in here where it works and I'm just too out of date to know?

<shrug>

The crc32c generation and validation looks decent though we're
definitely going to want an inode flag so that we're not stuck with
stable page writes.

--D

> metadata.  It misses a feature bit in the superblock, checking that
> enough size is available in the metadata and many other things.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_data_csum.c | 79 ++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 76 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/xfs/xfs_data_csum.c b/fs/xfs/xfs_data_csum.c
> index d9d3620654b1..862388803398 100644
> --- a/fs/xfs/xfs_data_csum.c
> +++ b/fs/xfs/xfs_data_csum.c
> @@ -14,6 +14,73 @@
>  #include <linux/blk-integrity.h>
>  #include <linux/bio-integrity.h>
>  
> +static inline void *xfs_csum_buf(struct bio *bio)
> +{
> +	return bvec_virt(bio_integrity(bio)->bip_vec);
> +}
> +
> +static inline __le32
> +xfs_data_csum(
> +	void			*data,
> +	unsigned int		len)
> +{
> +	return xfs_end_cksum(crc32c(XFS_CRC_SEED, data, len));
> +}
> +
> +static void
> +__xfs_data_csum_generate(
> +	struct bio		*bio)
> +{
> +	unsigned int		ssize = bdev_logical_block_size(bio->bi_bdev);
> +	__le32			*csum_buf = xfs_csum_buf(bio);
> +	struct bvec_iter_all	iter;
> +	struct bio_vec		*bv;
> +	int			c = 0;
> +
> +	bio_for_each_segment_all(bv, bio, iter) {
> +		void		*p;
> +		unsigned int	off;
> +
> +		p = bvec_kmap_local(bv);
> +		for (off = 0; off < bv->bv_len; off += ssize)
> +			csum_buf[c++] = xfs_data_csum(p + off, ssize);
> +		kunmap_local(p);
> +	}
> +}
> +
> +static int
> +__xfs_data_csum_verify(
> +	struct bio		*bio,
> +	struct xfs_inode	*ip,
> +	xfs_off_t		file_offset)
> +{
> +	unsigned int		ssize = bdev_logical_block_size(bio->bi_bdev);
> +	__le32			*csum_buf = xfs_csum_buf(bio);
> +	int			c = 0;
> +	struct bvec_iter_all	iter;
> +	struct bio_vec		*bv;
> +
> +	bio_for_each_segment_all(bv, bio, iter) {
> +		void		*p;
> +		unsigned int	off;
> +
> +		p = bvec_kmap_local(bv);
> +		for (off = 0; off < bv->bv_len; off += ssize) {
> +			if (xfs_data_csum(p + off, ssize) != csum_buf[c++]) {
> +				kunmap_local(p);
> +				xfs_warn(ip->i_mount,
> +"checksum mismatch at inode 0x%llx offset %lld",
> +					ip->i_ino, file_offset);
> +				return -EFSBADCRC;
> +			}
> +			file_offset += ssize;
> +		}
> +		kunmap_local(p);
> +	}
> +
> +	return 0;
> +}
> +
>  void *
>  xfs_data_csum_alloc(
>  	struct bio		*bio)
> @@ -53,11 +120,14 @@ xfs_data_csum_generate(
>  {
>  	struct blk_integrity	*bi = blk_get_integrity(bio->bi_bdev->bd_disk);
>  
> -	if (!bi || !bi->csum_type)
> +	if (!bi)
>  		return;
>  
>  	xfs_data_csum_alloc(bio);
> -	blk_integrity_generate(bio);
> +	if (!bi->csum_type)
> +		__xfs_data_csum_generate(bio);
> +	else
> +		blk_integrity_generate(bio);
>  }
>  
>  int
> @@ -67,7 +137,10 @@ xfs_data_csum_verify(
>  	struct bio		*bio = &ioend->io_bio;
>  	struct blk_integrity	*bi = blk_get_integrity(bio->bi_bdev->bd_disk);
>  
> -	if (!bi || !bi->csum_type)
> +	if (!bi)
>  		return 0;
> +	if (!bi->csum_type)
> +		return __xfs_data_csum_verify(&ioend->io_bio,
> +				XFS_I(ioend->io_inode), ioend->io_offset);
>  	return blk_integrity_verify_all(bio, ioend->io_sector);
>  }
> -- 
> 2.45.2
> 
>
Christoph Hellwig Feb. 4, 2025, 5 a.m. UTC | #2
On Mon, Feb 03, 2025 at 02:20:31PM -0800, Darrick J. Wong wrote:
> On Mon, Feb 03, 2025 at 10:43:11AM +0100, Christoph Hellwig wrote:
> > This is a quick hack to demonstrate how data checksumming can be
> > implemented when it can be stored in the out of line metadata for each
> > logical block.  It builds on top of the previous PI infrastructure
> > and instead of generating/verifying protection information it simply
> > generates and verifies a crc32c checksum and stores it in the non-PI
> 
> PI can do crc32c now?  I thought it could only do that old crc16 from
> like 15 years ago and crc64?

NVMe has a protection information format with a crc32c, but it's not
supported by Linux yet.

> If we try to throw crc32c at a device,
> won't it then reject the "incorrect" checksums?  Or is there some other
> magic in here where it works and I'm just too out of date to know?

This patch implements XFS-level data checksums on devices that implement
non-PI metadata, that is the device allows to store extra data with the
LBA, but doesn't actually interpret and verify it іn any way.

> The crc32c generation and validation looks decent though we're
> definitely going to want an inode flag so that we're not stuck with
> stable page writes.

Yeah, support the NOCOW flag, have a sb flag to enable the checksums,
maybe even a field about what checksum to use, yodda, yodda.
Darrick J. Wong Feb. 4, 2025, 6:36 p.m. UTC | #3
On Tue, Feb 04, 2025 at 06:00:25AM +0100, Christoph Hellwig wrote:
> On Mon, Feb 03, 2025 at 02:20:31PM -0800, Darrick J. Wong wrote:
> > On Mon, Feb 03, 2025 at 10:43:11AM +0100, Christoph Hellwig wrote:
> > > This is a quick hack to demonstrate how data checksumming can be
> > > implemented when it can be stored in the out of line metadata for each
> > > logical block.  It builds on top of the previous PI infrastructure
> > > and instead of generating/verifying protection information it simply
> > > generates and verifies a crc32c checksum and stores it in the non-PI
> > 
> > PI can do crc32c now?  I thought it could only do that old crc16 from
> > like 15 years ago and crc64?
> 
> NVMe has a protection information format with a crc32c, but it's not
> supported by Linux yet.

Ah.  Missed that!

> > If we try to throw crc32c at a device,
> > won't it then reject the "incorrect" checksums?  Or is there some other
> > magic in here where it works and I'm just too out of date to know?
> 
> This patch implements XFS-level data checksums on devices that implement
> non-PI metadata, that is the device allows to store extra data with the
> LBA, but doesn't actually interpret and verify it іn any way.

Ohhhhh.  So the ondisk metadata /would/ need to capture the checksum
type and which inodes are participating.

> > The crc32c generation and validation looks decent though we're
> > definitely going to want an inode flag so that we're not stuck with
> > stable page writes.
> 
> Yeah, support the NOCOW flag, have a sb flag to enable the checksums,
> maybe even a field about what checksum to use, yodda, yodda.

Why do we need nocow?  Won't the block contents and the PI data get
written in an untorn fashion?

--D
diff mbox series

Patch

diff --git a/fs/xfs/xfs_data_csum.c b/fs/xfs/xfs_data_csum.c
index d9d3620654b1..862388803398 100644
--- a/fs/xfs/xfs_data_csum.c
+++ b/fs/xfs/xfs_data_csum.c
@@ -14,6 +14,73 @@ 
 #include <linux/blk-integrity.h>
 #include <linux/bio-integrity.h>
 
+static inline void *xfs_csum_buf(struct bio *bio)
+{
+	return bvec_virt(bio_integrity(bio)->bip_vec);
+}
+
+static inline __le32
+xfs_data_csum(
+	void			*data,
+	unsigned int		len)
+{
+	return xfs_end_cksum(crc32c(XFS_CRC_SEED, data, len));
+}
+
+static void
+__xfs_data_csum_generate(
+	struct bio		*bio)
+{
+	unsigned int		ssize = bdev_logical_block_size(bio->bi_bdev);
+	__le32			*csum_buf = xfs_csum_buf(bio);
+	struct bvec_iter_all	iter;
+	struct bio_vec		*bv;
+	int			c = 0;
+
+	bio_for_each_segment_all(bv, bio, iter) {
+		void		*p;
+		unsigned int	off;
+
+		p = bvec_kmap_local(bv);
+		for (off = 0; off < bv->bv_len; off += ssize)
+			csum_buf[c++] = xfs_data_csum(p + off, ssize);
+		kunmap_local(p);
+	}
+}
+
+static int
+__xfs_data_csum_verify(
+	struct bio		*bio,
+	struct xfs_inode	*ip,
+	xfs_off_t		file_offset)
+{
+	unsigned int		ssize = bdev_logical_block_size(bio->bi_bdev);
+	__le32			*csum_buf = xfs_csum_buf(bio);
+	int			c = 0;
+	struct bvec_iter_all	iter;
+	struct bio_vec		*bv;
+
+	bio_for_each_segment_all(bv, bio, iter) {
+		void		*p;
+		unsigned int	off;
+
+		p = bvec_kmap_local(bv);
+		for (off = 0; off < bv->bv_len; off += ssize) {
+			if (xfs_data_csum(p + off, ssize) != csum_buf[c++]) {
+				kunmap_local(p);
+				xfs_warn(ip->i_mount,
+"checksum mismatch at inode 0x%llx offset %lld",
+					ip->i_ino, file_offset);
+				return -EFSBADCRC;
+			}
+			file_offset += ssize;
+		}
+		kunmap_local(p);
+	}
+
+	return 0;
+}
+
 void *
 xfs_data_csum_alloc(
 	struct bio		*bio)
@@ -53,11 +120,14 @@  xfs_data_csum_generate(
 {
 	struct blk_integrity	*bi = blk_get_integrity(bio->bi_bdev->bd_disk);
 
-	if (!bi || !bi->csum_type)
+	if (!bi)
 		return;
 
 	xfs_data_csum_alloc(bio);
-	blk_integrity_generate(bio);
+	if (!bi->csum_type)
+		__xfs_data_csum_generate(bio);
+	else
+		blk_integrity_generate(bio);
 }
 
 int
@@ -67,7 +137,10 @@  xfs_data_csum_verify(
 	struct bio		*bio = &ioend->io_bio;
 	struct blk_integrity	*bi = blk_get_integrity(bio->bi_bdev->bd_disk);
 
-	if (!bi || !bi->csum_type)
+	if (!bi)
 		return 0;
+	if (!bi->csum_type)
+		return __xfs_data_csum_verify(&ioend->io_bio,
+				XFS_I(ioend->io_inode), ioend->io_offset);
 	return blk_integrity_verify_all(bio, ioend->io_sector);
 }