diff mbox series

[v3,25/28] xfs: add fs-verity support

Message ID 20231006184922.252188-26-aalbersh@redhat.com (mailing list archive)
State New, archived
Headers show
Series fs-verity support for XFS | expand

Commit Message

Andrey Albershteyn Oct. 6, 2023, 6:49 p.m. UTC
Add integration with fs-verity. The XFS store fs-verity metadata in
the extended attributes. The metadata consist of verity descriptor
and Merkle tree blocks.

The descriptor is stored under "verity_descriptor" extended
attribute. The Merkle tree blocks are stored under binary indexes.

When fs-verity is enabled on an inode, the XFS_IVERITY_CONSTRUCTION
flag is set meaning that the Merkle tree is being build. The
initialization ends with storing of verity descriptor and setting
inode on-disk flag (XFS_DIFLAG2_VERITY).

The verification on read is done in iomap. Based on the inode verity
flag the IOMAP_F_READ_VERITY is set in xfs_read_iomap_begin() to let
iomap know that verification is needed.

Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
---
 fs/xfs/Makefile                 |   1 +
 fs/xfs/libxfs/xfs_attr.c        |  13 ++
 fs/xfs/libxfs/xfs_attr_leaf.c   |  17 ++-
 fs/xfs/libxfs/xfs_attr_remote.c |   8 +-
 fs/xfs/libxfs/xfs_da_format.h   |  16 ++
 fs/xfs/xfs_inode.h              |   3 +-
 fs/xfs/xfs_iomap.c              |   3 +
 fs/xfs/xfs_ondisk.h             |   4 +
 fs/xfs/xfs_super.c              |   8 +
 fs/xfs/xfs_verity.c             | 257 ++++++++++++++++++++++++++++++++
 fs/xfs/xfs_verity.h             |  37 +++++
 11 files changed, 360 insertions(+), 7 deletions(-)
 create mode 100644 fs/xfs/xfs_verity.c
 create mode 100644 fs/xfs/xfs_verity.h

Comments

kernel test robot Oct. 6, 2023, 11:40 p.m. UTC | #1
Hi Andrey,

kernel test robot noticed the following build warnings:

[auto build test WARNING on xfs-linux/for-next]
[also build test WARNING on kdave/for-next tytso-ext4/dev jaegeuk-f2fs/dev-test jaegeuk-f2fs/dev linus/master v6.6-rc4 next-20231006]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Andrey-Albershteyn/xfs-Add-new-name-to-attri-d/20231007-025742
base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
patch link:    https://lore.kernel.org/r/20231006184922.252188-26-aalbersh%40redhat.com
patch subject: [PATCH v3 25/28] xfs: add fs-verity support
config: powerpc64-randconfig-002-20231007 (https://download.01.org/0day-ci/archive/20231007/202310070701.WiXRnofP-lkp@intel.com/config)
compiler: powerpc64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231007/202310070701.WiXRnofP-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310070701.WiXRnofP-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> fs/xfs/xfs_verity.c:165:1: warning: no previous prototype for 'xfs_read_merkle_tree_block' [-Wmissing-prototypes]
     165 | xfs_read_merkle_tree_block(
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~


vim +/xfs_read_merkle_tree_block +165 fs/xfs/xfs_verity.c

   163	
   164	int
 > 165	xfs_read_merkle_tree_block(
   166		struct inode		*inode,
   167		unsigned int		pos,
   168		struct fsverity_block	*block,
   169		unsigned long		num_ra_pages)
   170	{
   171		struct xfs_inode	*ip = XFS_I(inode);
   172		struct xfs_fsverity_merkle_key name;
   173		int			error = 0;
   174		struct xfs_da_args	args = {
   175			.dp		= ip,
   176			.attr_filter	= XFS_ATTR_VERITY,
   177			.namelen	= sizeof(struct xfs_fsverity_merkle_key),
   178		};
   179		xfs_fsverity_merkle_key_to_disk(&name, pos);
   180		args.name = (const uint8_t *)&name.merkleoff;
   181	
   182		error = xfs_attr_get(&args);
   183		if (error)
   184			goto out;
   185	
   186		WARN_ON_ONCE(!args.valuelen);
   187	
   188		/* now we also want to get underlying xfs_buf */
   189		args.op_flags = XFS_DA_OP_BUFFER;
   190		error = xfs_attr_get(&args);
   191		if (error)
   192			goto out;
   193	
   194		block->kaddr = args.value;
   195		block->len = args.valuelen;
   196		block->cached = args.bp->b_flags & XBF_VERITY_CHECKED;
   197		block->context = args.bp;
   198	
   199		return error;
   200	
   201	out:
   202		kmem_free(args.value);
   203		if (args.bp)
   204			xfs_buf_rele(args.bp);
   205		return error;
   206	}
   207
Darrick J. Wong Oct. 11, 2023, 1:39 a.m. UTC | #2
On Fri, Oct 06, 2023 at 08:49:19PM +0200, Andrey Albershteyn wrote:
> Add integration with fs-verity. The XFS store fs-verity metadata in
> the extended attributes. The metadata consist of verity descriptor
> and Merkle tree blocks.
> 
> The descriptor is stored under "verity_descriptor" extended
> attribute. The Merkle tree blocks are stored under binary indexes.
> 
> When fs-verity is enabled on an inode, the XFS_IVERITY_CONSTRUCTION
> flag is set meaning that the Merkle tree is being build. The
> initialization ends with storing of verity descriptor and setting
> inode on-disk flag (XFS_DIFLAG2_VERITY).
> 
> The verification on read is done in iomap. Based on the inode verity
> flag the IOMAP_F_READ_VERITY is set in xfs_read_iomap_begin() to let
> iomap know that verification is needed.
> 
> Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
> ---
>  fs/xfs/Makefile                 |   1 +
>  fs/xfs/libxfs/xfs_attr.c        |  13 ++
>  fs/xfs/libxfs/xfs_attr_leaf.c   |  17 ++-
>  fs/xfs/libxfs/xfs_attr_remote.c |   8 +-
>  fs/xfs/libxfs/xfs_da_format.h   |  16 ++
>  fs/xfs/xfs_inode.h              |   3 +-
>  fs/xfs/xfs_iomap.c              |   3 +
>  fs/xfs/xfs_ondisk.h             |   4 +
>  fs/xfs/xfs_super.c              |   8 +
>  fs/xfs/xfs_verity.c             | 257 ++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_verity.h             |  37 +++++
>  11 files changed, 360 insertions(+), 7 deletions(-)
>  create mode 100644 fs/xfs/xfs_verity.c
>  create mode 100644 fs/xfs/xfs_verity.h
> 
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 7762c01a85cf..c1a58ed8b419 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -130,6 +130,7 @@ xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
>  xfs-$(CONFIG_SYSCTL)		+= xfs_sysctl.o
>  xfs-$(CONFIG_COMPAT)		+= xfs_ioctl32.o
>  xfs-$(CONFIG_EXPORTFS_BLOCK_OPS)	+= xfs_pnfs.o
> +xfs-$(CONFIG_FS_VERITY)		+= xfs_verity.o
>  
>  # notify failure
>  ifeq ($(CONFIG_MEMORY_FAILURE),y)
> diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
> index 298b74245267..25e1f829e01e 100644
> --- a/fs/xfs/libxfs/xfs_attr.c
> +++ b/fs/xfs/libxfs/xfs_attr.c
> @@ -26,6 +26,7 @@
>  #include "xfs_trace.h"
>  #include "xfs_attr_item.h"
>  #include "xfs_xattr.h"
> +#include "xfs_verity.h"
>  
>  struct kmem_cache		*xfs_attr_intent_cache;
>  
> @@ -1635,6 +1636,18 @@ xfs_attr_namecheck(
>  		return xfs_verify_pptr(mp, (struct xfs_parent_name_rec *)name);
>  	}
>  
> +	if (flags & XFS_ATTR_VERITY) {
> +		/* Merkle tree pages are stored under u64 indexes */
> +		if (length == sizeof(struct xfs_fsverity_merkle_key))
> +			return true;
> +
> +		/* Verity descriptor blocks are held in a named attribute. */
> +		if (length == XFS_VERITY_DESCRIPTOR_NAME_LEN)
> +			return true;
> +
> +		return false;
> +	}
> +
>  	return xfs_str_attr_namecheck(name, length);
>  }
>  
> diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
> index a84795d70de1..36d1f88d972f 100644
> --- a/fs/xfs/libxfs/xfs_attr_leaf.c
> +++ b/fs/xfs/libxfs/xfs_attr_leaf.c
> @@ -29,6 +29,7 @@
>  #include "xfs_log.h"
>  #include "xfs_ag.h"
>  #include "xfs_errortag.h"
> +#include "xfs_verity.h"
>  
>  
>  /*
> @@ -518,7 +519,12 @@ xfs_attr_copy_value(
>  		return -ERANGE;
>  	}
>  
> -	if (!args->value) {
> +	/*
> +	 * We don't want to allocate memory for fs-verity Merkle tree blocks
> +	 * (fs-verity descriptor is fine though). They will be stored in
> +	 * underlying xfs_buf
> +	 */
> +	if (!args->value && !xfs_verity_merkle_block(args)) {

Hmm, why isn't this simply !(args->op_flags & XFS_DA_OP_BUFFER) ?

>  		args->value = kvmalloc(valuelen, GFP_KERNEL | __GFP_NOLOCKDEP);
>  		if (!args->value)
>  			return -ENOMEM;
> @@ -537,7 +543,14 @@ xfs_attr_copy_value(
>  	 */
>  	if (!value)
>  		return -EINVAL;
> -	memcpy(args->value, value, valuelen);
> +	/*
> +	 * We won't copy Merkle tree block to the args->value as we want it be
> +	 * in the xfs_buf. And we didn't allocate any memory in args->value.
> +	 */
> +	if (xfs_verity_merkle_block(args))
> +		args->value = value;
> +	else
> +		memcpy(args->value, value, valuelen);
>  	return 0;
>  }
>  
> diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
> index 7657daf7cff3..7b4424e3454b 100644
> --- a/fs/xfs/libxfs/xfs_attr_remote.c
> +++ b/fs/xfs/libxfs/xfs_attr_remote.c
> @@ -22,6 +22,7 @@
>  #include "xfs_attr_remote.h"
>  #include "xfs_trace.h"
>  #include "xfs_error.h"
> +#include "xfs_verity.h"
>  
>  #define ATTR_RMTVALUE_MAPSIZE	1	/* # of map entries at once */
>  
> @@ -401,11 +402,10 @@ xfs_attr_rmtval_get(
>  	ASSERT(args->rmtvaluelen == args->valuelen);
>  
>  	/*
> -	 * We also check for _OP_BUFFER as we want to trigger on
> -	 * verity blocks only, not on verity_descriptor
> +	 * For fs-verity we want additional space in the xfs_buf. This space is
> +	 * used to copy xattr value without leaf headers (crc header).
>  	 */
> -	if (args->attr_filter & XFS_ATTR_VERITY &&
> -			args->op_flags & XFS_DA_OP_BUFFER)
> +	if (xfs_verity_merkle_block(args))
>  		flags = XBF_DOUBLE_ALLOC;

Hmm, not sure what DOUBLE_ALLOC does, but I'll get there as I go
backwards through the XFS patches...

>  
>  	valuelen = args->rmtvaluelen;
> diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
> index b56bdae83563..a678ad5e4a08 100644
> --- a/fs/xfs/libxfs/xfs_da_format.h
> +++ b/fs/xfs/libxfs/xfs_da_format.h
> @@ -903,4 +903,20 @@ struct xfs_parent_name_irec {
>  	uint8_t			p_namelen;
>  };
>  
> +/*
> + * fs-verity attribute name format
> + *
> + * Merkle tree blocks are stored under extended attributes of the inode. The
> + * name of the attributes are offsets into merkle tree.

Are these offsets byte offsets?

> + */
> +struct xfs_fsverity_merkle_key {
> +	__be64 merkleoff;
> +};
> +
> +static inline void
> +xfs_fsverity_merkle_key_to_disk(struct xfs_fsverity_merkle_key *key, loff_t pos)
> +{
> +	key->merkleoff = cpu_to_be64(pos);
> +}
> +
>  #endif /* __XFS_DA_FORMAT_H__ */
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 0c5bdb91152e..e6c30a69e8d1 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -342,7 +342,8 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
>   * inactivation completes, both flags will be cleared and the inode is a
>   * plain old IRECLAIMABLE inode.
>   */
> -#define XFS_INACTIVATING	(1 << 13)
> +#define XFS_INACTIVATING		(1 << 13)
> +#define XFS_IVERITY_CONSTRUCTION	(1 << 14) /* merkle tree construction */

Under construction?  Or already built?

>  
>  /* Quotacheck is running but inode has not been added to quota counts. */
>  #define XFS_IQUOTAUNCHECKED	(1 << 14)

These two iflags have the same definition.

> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 18c8f168b153..80b249c42067 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -132,6 +132,9 @@ xfs_bmbt_to_iomap(
>  	    (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
>  		iomap->flags |= IOMAP_F_DIRTY;
>  
> +	if (fsverity_active(VFS_I(ip)))
> +		iomap->flags |= IOMAP_F_READ_VERITY;
> +
>  	iomap->validity_cookie = sequence_cookie;
>  	iomap->folio_ops = &xfs_iomap_folio_ops;
>  	return 0;
> diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
> index c4cc99b70dd3..accbbdeb7624 100644
> --- a/fs/xfs/xfs_ondisk.h
> +++ b/fs/xfs/xfs_ondisk.h
> @@ -190,6 +190,10 @@ xfs_check_ondisk_structs(void)
>  	XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MIN << XFS_DQ_BIGTIME_SHIFT, 4);
>  	XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT,
>  			16299260424LL);
> +
> +	/* fs-verity descriptor xattr name */
> +	XFS_CHECK_VALUE(strlen(XFS_VERITY_DESCRIPTOR_NAME),
> +			XFS_VERITY_DESCRIPTOR_NAME_LEN);
>  }
>  
>  #endif /* __XFS_ONDISK_H */
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index 6a3b5285044a..f32392add622 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -30,6 +30,7 @@
>  #include "xfs_filestream.h"
>  #include "xfs_quota.h"
>  #include "xfs_sysfs.h"
> +#include "xfs_verity.h"
>  #include "xfs_ondisk.h"
>  #include "xfs_rmap_item.h"
>  #include "xfs_refcount_item.h"
> @@ -1526,6 +1527,9 @@ xfs_fs_fill_super(
>  	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
>  #endif
>  	sb->s_op = &xfs_super_operations;
> +#ifdef CONFIG_FS_VERITY
> +	sb->s_vop = &xfs_verity_ops;
> +#endif
>  
>  	/*
>  	 * Delay mount work if the debug hook is set. This is debug
> @@ -1735,6 +1739,10 @@ xfs_fs_fill_super(
>  		goto out_filestream_unmount;
>  	}
>  
> +	if (xfs_has_verity(mp))
> +		xfs_alert(mp,
> +	"EXPERIMENTAL fs-verity feature in use. Use at your own risk!");
> +
>  	error = xfs_mountfs(mp);
>  	if (error)
>  		goto out_filestream_unmount;
> diff --git a/fs/xfs/xfs_verity.c b/fs/xfs/xfs_verity.c
> new file mode 100644
> index 000000000000..a2db56974122
> --- /dev/null
> +++ b/fs/xfs/xfs_verity.c
> @@ -0,0 +1,257 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2023 Red Hat, Inc.
> + */
> +#include "xfs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_da_format.h"
> +#include "xfs_da_btree.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_inode.h"
> +#include "xfs_attr.h"
> +#include "xfs_verity.h"
> +#include "xfs_bmap_util.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans.h"
> +
> +static int

Hum, why isn't this ssize_t?  That's what other IO functions return when
returning the number of bytes acted upon.

> +xfs_get_verity_descriptor(
> +	struct inode		*inode,
> +	void			*buf,
> +	size_t			buf_size)
> +{
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	int			error = 0;
> +	struct xfs_da_args	args = {
> +		.dp		= ip,
> +		.attr_filter	= XFS_ATTR_VERITY,
> +		.name		= (const uint8_t *)XFS_VERITY_DESCRIPTOR_NAME,
> +		.namelen	= XFS_VERITY_DESCRIPTOR_NAME_LEN,
> +		.value		= buf,
> +		.valuelen	= buf_size,
> +	};
> +
> +	/*
> +	 * The fact that (returned attribute size) == (provided buf_size) is
> +	 * checked by xfs_attr_copy_value() (returns -ERANGE)
> +	 */
> +	error = xfs_attr_get(&args);
> +	if (error)
> +		return error;
> +
> +	return args.valuelen;
> +}
> +
> +static int
> +xfs_begin_enable_verity(
> +	struct file	    *filp)
> +{
> +	struct inode	    *inode = file_inode(filp);
> +	struct xfs_inode    *ip = XFS_I(inode);
> +	int		    error = 0;
> +
> +	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
> +
> +	if (IS_DAX(inode))
> +		return -EINVAL;
> +
> +	if (xfs_iflags_test_and_set(ip, XFS_IVERITY_CONSTRUCTION))
> +		return -EBUSY;
> +
> +	return error;
> +}
> +
> +static int
> +xfs_drop_merkle_tree(
> +	struct xfs_inode	*ip,
> +	u64			merkle_tree_size,
> +	u8			log_blocksize)
> +{
> +	struct xfs_fsverity_merkle_key	name;
> +	int			error = 0, index;
> +	u64			offset = 0;
> +	struct xfs_da_args	args = {
> +		.dp		= ip,
> +		.whichfork	= XFS_ATTR_FORK,
> +		.attr_filter	= XFS_ATTR_VERITY,
> +		.namelen	= sizeof(struct xfs_fsverity_merkle_key),
> +		/* NULL value make xfs_attr_set remove the attr */
> +		.value		= NULL,
> +	};
> +
> +	for (index = 1; offset < merkle_tree_size; index++) {
> +		xfs_fsverity_merkle_key_to_disk(&name, offset);
> +		args.name = (const uint8_t *)&name.merkleoff;
> +		args.attr_filter = XFS_ATTR_VERITY;

Why do these two args. fields need to be reset every time through the
loop?

> +		error = xfs_attr_set(&args);

Why is it ok to drop the error here?

> +		offset = index << log_blocksize;

Hm, ok, the merkle key offsets /are/ byte offsets.

Isn't this whole loop just:

	args.name = ...;
	args.attr_filter = ...;
	for (offset = 0; offset < merkle_tree_size; offset++) {
		xfs_fsverity_merkle_key_to_disk(&name, pos << log_blocksize);
		error = xfs_attr_set(&args);
		if (error)
			return error;
	}

> +	}
> +
> +	args.name = (const uint8_t *)XFS_VERITY_DESCRIPTOR_NAME;
> +	args.namelen = XFS_VERITY_DESCRIPTOR_NAME_LEN;
> +	args.attr_filter = XFS_ATTR_VERITY;
> +	error = xfs_attr_set(&args);
> +
> +	return error;
> +}
> +
> +static int
> +xfs_end_enable_verity(
> +	struct file		*filp,
> +	const void		*desc,
> +	size_t			desc_size,
> +	u64			merkle_tree_size,
> +	u8			log_blocksize)
> +{
> +	struct inode		*inode = file_inode(filp);
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct xfs_trans	*tp;
> +	struct xfs_da_args	args = {
> +		.dp		= ip,
> +		.whichfork	= XFS_ATTR_FORK,
> +		.attr_filter	= XFS_ATTR_VERITY,
> +		.attr_flags	= XATTR_CREATE,
> +		.name		= (const uint8_t *)XFS_VERITY_DESCRIPTOR_NAME,
> +		.namelen	= XFS_VERITY_DESCRIPTOR_NAME_LEN,
> +		.value		= (void *)desc,
> +		.valuelen	= desc_size,
> +	};
> +	int			error = 0;
> +
> +	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
> +
> +	/* fs-verity failed, just cleanup */
> +	if (desc == NULL)
> +		goto out;
> +
> +	error = xfs_attr_set(&args);
> +	if (error)
> +		goto out;
> +
> +	/* Set fsverity inode flag */
> +	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange,
> +			0, 0, false, &tp);
> +	if (error)
> +		goto out;
> +
> +	/*
> +	 * Ensure that we've persisted the verity information before we enable
> +	 * it on the inode and tell the caller we have sealed the inode.
> +	 */
> +	ip->i_diflags2 |= XFS_DIFLAG2_VERITY;
> +
> +	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> +	xfs_trans_set_sync(tp);
> +
> +	error = xfs_trans_commit(tp);
> +	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +
> +	if (!error)
> +		inode->i_flags |= S_VERITY;
> +
> +out:
> +	if (error)
> +		WARN_ON_ONCE(xfs_drop_merkle_tree(ip, merkle_tree_size,
> +						  log_blocksize));

(Don't WARNings panic some kernels?)

> +
> +	xfs_iflags_clear(ip, XFS_IVERITY_CONSTRUCTION);
> +	return error;
> +}
> +
> +int
> +xfs_read_merkle_tree_block(
> +	struct inode		*inode,
> +	unsigned int		pos,
> +	struct fsverity_block	*block,
> +	unsigned long		num_ra_pages)
> +{
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct xfs_fsverity_merkle_key name;
> +	int			error = 0;
> +	struct xfs_da_args	args = {
> +		.dp		= ip,
> +		.attr_filter	= XFS_ATTR_VERITY,
> +		.namelen	= sizeof(struct xfs_fsverity_merkle_key),
> +	};
> +	xfs_fsverity_merkle_key_to_disk(&name, pos);
> +	args.name = (const uint8_t *)&name.merkleoff;
> +
> +	error = xfs_attr_get(&args);
> +	if (error)
> +		goto out;
> +
> +	WARN_ON_ONCE(!args.valuelen);
> +
> +	/* now we also want to get underlying xfs_buf */
> +	args.op_flags = XFS_DA_OP_BUFFER;

If XFS_DA_OP_BUFFER returns the (bhold'd) xfs_buf containing the value,
then ... can't we call xfs_attr_get once?

Can the xfs_buf contents change after we drop the I{,O}LOCK?

Can users (or the kernel) change or add xattrs on a verity file?

Are they allowed to move the file, and hence update the parent pointers?

Or is the point of XBF_DOUBLE_ALLOC that we'll snapshot the attr data
into the second half of the buffer, and that's what gets passed to
fsverity core code?

> +	error = xfs_attr_get(&args);
> +	if (error)
> +		goto out;
> +
> +	block->kaddr = args.value;
> +	block->len = args.valuelen;
> +	block->cached = args.bp->b_flags & XBF_VERITY_CHECKED;
> +	block->context = args.bp;
> +
> +	return error;
> +
> +out:
> +	kmem_free(args.value);
> +	if (args.bp)
> +		xfs_buf_rele(args.bp);
> +	return error;
> +}
> +
> +static int
> +xfs_write_merkle_tree_block(
> +	struct inode		*inode,
> +	const void		*buf,
> +	u64			pos,
> +	unsigned int		size)
> +{
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct xfs_fsverity_merkle_key	name;
> +	struct xfs_da_args	args = {
> +		.dp		= ip,
> +		.whichfork	= XFS_ATTR_FORK,
> +		.attr_filter	= XFS_ATTR_VERITY,
> +		.attr_flags	= XATTR_CREATE,

What happens if merkle tree fails midway through writing the blobs to
the xattr tree?  If they're not removed, then won't XATTR_CREATE here
cause a second attempt to fail because there's a half-built tree?

> +		.namelen	= sizeof(struct xfs_fsverity_merkle_key),
> +		.value		= (void *)buf,
> +		.valuelen	= size,
> +	};
> +
> +	xfs_fsverity_merkle_key_to_disk(&name, pos);
> +	args.name = (const uint8_t *)&name.merkleoff;
> +
> +	return xfs_attr_set(&args);
> +}
> +
> +static void
> +xfs_drop_block(
> +	struct fsverity_block	*block)
> +{
> +	struct xfs_buf		*buf;
> +
> +	ASSERT(block != NULL);
> +
> +	buf = (struct xfs_buf *)block->context;
> +
> +	if (block->cached)
> +		buf->b_flags |= XBF_VERITY_CHECKED;
> +	xfs_buf_rele(buf);
> +
> +	kunmap_local(block->kaddr);
> +}
> +
> +const struct fsverity_operations xfs_verity_ops = {
> +	.begin_enable_verity		= &xfs_begin_enable_verity,
> +	.end_enable_verity		= &xfs_end_enable_verity,
> +	.get_verity_descriptor		= &xfs_get_verity_descriptor,
> +	.read_merkle_tree_block		= &xfs_read_merkle_tree_block,
> +	.write_merkle_tree_block	= &xfs_write_merkle_tree_block,
> +	.drop_block			= &xfs_drop_block,
> +};
> diff --git a/fs/xfs/xfs_verity.h b/fs/xfs/xfs_verity.h
> new file mode 100644
> index 000000000000..0f32fd212091
> --- /dev/null
> +++ b/fs/xfs/xfs_verity.h
> @@ -0,0 +1,37 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022 Red Hat, Inc.
> + */
> +#ifndef __XFS_VERITY_H__
> +#define __XFS_VERITY_H__
> +
> +#include "xfs.h"
> +#include "xfs_da_format.h"
> +#include "xfs_da_btree.h"
> +#include <linux/fsverity.h>
> +
> +#define XFS_VERITY_DESCRIPTOR_NAME "verity_descriptor"
> +#define XFS_VERITY_DESCRIPTOR_NAME_LEN 17

Want to shorten this by one for u64 alignment? ;)

> +
> +static inline bool
> +xfs_verity_merkle_block(
> +		struct xfs_da_args *args)
> +{
> +	if (!(args->attr_filter & XFS_ATTR_VERITY))
> +		return false;
> +
> +	if (!(args->op_flags & XFS_DA_OP_BUFFER))
> +		return false;
> +
> +	if (args->valuelen < 1024 || args->valuelen > PAGE_SIZE ||
> +			!is_power_of_2(args->valuelen))
> +		return false;

Why do we check the valuelen here?

Also, if you're passing the xfs_buf out, I thought the buffer cache
could handle weird sizes?

> +
> +	return true;
> +}
> +
> +#ifdef CONFIG_FS_VERITY
> +extern const struct fsverity_operations xfs_verity_ops;
> +#endif	/* CONFIG_FS_VERITY */
> +
> +#endif	/* __XFS_VERITY_H__ */
> -- 
> 2.40.1
>
Andrey Albershteyn Oct. 11, 2023, 2:36 p.m. UTC | #3
On 2023-10-10 18:39:40, Darrick J. Wong wrote:
> On Fri, Oct 06, 2023 at 08:49:19PM +0200, Andrey Albershteyn wrote:
> > -	if (!args->value) {
> > +	/*
> > +	 * We don't want to allocate memory for fs-verity Merkle tree blocks
> > +	 * (fs-verity descriptor is fine though). They will be stored in
> > +	 * underlying xfs_buf
> > +	 */
> > +	if (!args->value && !xfs_verity_merkle_block(args)) {
> 
> Hmm, why isn't this simply !(args->op_flags & XFS_DA_OP_BUFFER) ?

I check if args contains fs-verity block in multiple places so I
wrapped it into a function. Also, XFS_DA_OP_BUFFER only sets
args->bp. It doesn't affect where xfs_attr_copy_value copies the
value (this is changed with XBF_DOUBLE_ALLOC).

> > -	if (args->attr_filter & XFS_ATTR_VERITY &&
> > -			args->op_flags & XFS_DA_OP_BUFFER)
> > +	if (xfs_verity_merkle_block(args))
> >  		flags = XBF_DOUBLE_ALLOC;
> 
> Hmm, not sure what DOUBLE_ALLOC does, but I'll get there as I go
> backwards through the XFS patches...
> 

> > +/*
> > + * fs-verity attribute name format
> > + *
> > + * Merkle tree blocks are stored under extended attributes of the inode. The
> > + * name of the attributes are offsets into merkle tree.
> 
> Are these offsets byte offsets?
> 

Byte offsets.

> > +++ b/fs/xfs/xfs_inode.h
> > @@ -342,7 +342,8 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
> >   * inactivation completes, both flags will be cleared and the inode is a
> >   * plain old IRECLAIMABLE inode.
> >   */
> > -#define XFS_INACTIVATING	(1 << 13)
> > +#define XFS_INACTIVATING		(1 << 13)
> > +#define XFS_IVERITY_CONSTRUCTION	(1 << 14) /* merkle tree construction */
> 
> Under construction?  Or already built?

Under construction.

> >  
> >  /* Quotacheck is running but inode has not been added to quota counts. */
> >  #define XFS_IQUOTAUNCHECKED	(1 << 14)
> 
> These two iflags have the same definition.

Oops! Don't know how did I missed that, thanks!

> > diff --git a/fs/xfs/xfs_verity.c b/fs/xfs/xfs_verity.c
> > new file mode 100644
> > index 000000000000..a2db56974122
> > --- /dev/null
> > +++ b/fs/xfs/xfs_verity.c
> > @@ -0,0 +1,257 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2023 Red Hat, Inc.
> > + */
> > +#include "xfs.h"
> > +#include "xfs_shared.h"
> > +#include "xfs_format.h"
> > +#include "xfs_da_format.h"
> > +#include "xfs_da_btree.h"
> > +#include "xfs_trans_resv.h"
> > +#include "xfs_mount.h"
> > +#include "xfs_inode.h"
> > +#include "xfs_attr.h"
> > +#include "xfs_verity.h"
> > +#include "xfs_bmap_util.h"
> > +#include "xfs_log_format.h"
> > +#include "xfs_trans.h"
> > +
> > +static int
> 
> Hum, why isn't this ssize_t?  That's what other IO functions return when
> returning the number of bytes acted upon.

The fs/verity prototype is with int, so I left it as int.

> > +static int
> > +xfs_drop_merkle_tree(
> > +	struct xfs_inode	*ip,
> > +	u64			merkle_tree_size,
> > +	u8			log_blocksize)
> > +{
> > +	struct xfs_fsverity_merkle_key	name;
> > +	int			error = 0, index;
> > +	u64			offset = 0;
> > +	struct xfs_da_args	args = {
> > +		.dp		= ip,
> > +		.whichfork	= XFS_ATTR_FORK,
> > +		.attr_filter	= XFS_ATTR_VERITY,
> > +		.namelen	= sizeof(struct xfs_fsverity_merkle_key),
> > +		/* NULL value make xfs_attr_set remove the attr */
> > +		.value		= NULL,
> > +	};
> > +
> > +	for (index = 1; offset < merkle_tree_size; index++) {
> > +		xfs_fsverity_merkle_key_to_disk(&name, offset);
> > +		args.name = (const uint8_t *)&name.merkleoff;
> > +		args.attr_filter = XFS_ATTR_VERITY;
> 
> Why do these two args. fields need to be reset every time through the
> loop?

Oh, yeah this should be ok. I will check it.

> > +		error = xfs_attr_set(&args);
> 
> Why is it ok to drop the error here?
> 

My bad, will handle it.

> > +		offset = index << log_blocksize;
> 
> Hm, ok, the merkle key offsets /are/ byte offsets.
> 
> Isn't this whole loop just:
> 
> 	args.name = ...;
> 	args.attr_filter = ...;
> 	for (offset = 0; offset < merkle_tree_size; offset++) {
> 		xfs_fsverity_merkle_key_to_disk(&name, pos << log_blocksize);
> 		error = xfs_attr_set(&args);
> 		if (error)
> 			return error;
> 	}
> 
> > +static int
> > +xfs_end_enable_verity(
> > +	struct file		*filp,
> > +	const void		*desc,
> > +	size_t			desc_size,
> > +	u64			merkle_tree_size,
> > +	u8			log_blocksize)
> > +{
> > +	struct inode		*inode = file_inode(filp);
> > +	struct xfs_inode	*ip = XFS_I(inode);
> > +	struct xfs_mount	*mp = ip->i_mount;
> > +	struct xfs_trans	*tp;
> > +	struct xfs_da_args	args = {
> > +		.dp		= ip,
> > +		.whichfork	= XFS_ATTR_FORK,
> > +		.attr_filter	= XFS_ATTR_VERITY,
> > +		.attr_flags	= XATTR_CREATE,
> > +		.name		= (const uint8_t *)XFS_VERITY_DESCRIPTOR_NAME,
> > +		.namelen	= XFS_VERITY_DESCRIPTOR_NAME_LEN,
> > +		.value		= (void *)desc,
> > +		.valuelen	= desc_size,
> > +	};
> > +	int			error = 0;
> > +
> > +	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
> > +
> > +	/* fs-verity failed, just cleanup */
> > +	if (desc == NULL)
> > +		goto out;
> > +
> > +	error = xfs_attr_set(&args);
> > +	if (error)
> > +		goto out;
> > +
> > +	/* Set fsverity inode flag */
> > +	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange,
> > +			0, 0, false, &tp);
> > +	if (error)
> > +		goto out;
> > +
> > +	/*
> > +	 * Ensure that we've persisted the verity information before we enable
> > +	 * it on the inode and tell the caller we have sealed the inode.
> > +	 */
> > +	ip->i_diflags2 |= XFS_DIFLAG2_VERITY;
> > +
> > +	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> > +	xfs_trans_set_sync(tp);
> > +
> > +	error = xfs_trans_commit(tp);
> > +	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> > +
> > +	if (!error)
> > +		inode->i_flags |= S_VERITY;
> > +
> > +out:
> > +	if (error)
> > +		WARN_ON_ONCE(xfs_drop_merkle_tree(ip, merkle_tree_size,
> > +						  log_blocksize));
> 
> (Don't WARNings panic some kernels?)

Not sure, but in case we fail to drop a tree (error already
happened) we probably want to know that something going on and tree
can not be removed. But I'm fine with removing this, not sure what
are the guidelines on WARNinigs.

> > +int
> > +xfs_read_merkle_tree_block(
> > +	struct inode		*inode,
> > +	unsigned int		pos,
> > +	struct fsverity_block	*block,
> > +	unsigned long		num_ra_pages)
> > +{
> > +	struct xfs_inode	*ip = XFS_I(inode);
> > +	struct xfs_fsverity_merkle_key name;
> > +	int			error = 0;
> > +	struct xfs_da_args	args = {
> > +		.dp		= ip,
> > +		.attr_filter	= XFS_ATTR_VERITY,
> > +		.namelen	= sizeof(struct xfs_fsverity_merkle_key),
> > +	};
> > +	xfs_fsverity_merkle_key_to_disk(&name, pos);
> > +	args.name = (const uint8_t *)&name.merkleoff;
> > +
> > +	error = xfs_attr_get(&args);
> > +	if (error)
> > +		goto out;
> > +
> > +	WARN_ON_ONCE(!args.valuelen);
> > +
> > +	/* now we also want to get underlying xfs_buf */
> > +	args.op_flags = XFS_DA_OP_BUFFER;
> 
> If XFS_DA_OP_BUFFER returns the (bhold'd) xfs_buf containing the value,
> then ... can't we call xfs_attr_get once?

hmm, yeah, one call would be probably enough. The initial two calls
were here because fs/verity expected an error if attribute doesn't
exist. But with change to fs/verity/verify.c in patch 10 it's
probably fine to do everything just in one call. Thanks!

> Can the xfs_buf contents change after we drop the I{,O}LOCK?

By fs-verity? No

> Can users (or the kernel) change or add xattrs on a verity file?
>
> Are they allowed to move the file, and hence update the parent pointers?

Moving is probably fine but I haven't tested if it works fine with
parent pointers. But as they are xattrs I think it shouldn't be a
problem. I will check it.

> Or is the point of XBF_DOUBLE_ALLOC that we'll snapshot the attr data
> into the second half of the buffer, and that's what gets passed to
> fsverity core code?

Yes, this

> > +static int
> > +xfs_write_merkle_tree_block(
> > +	struct inode		*inode,
> > +	const void		*buf,
> > +	u64			pos,
> > +	unsigned int		size)
> > +{
> > +	struct xfs_inode	*ip = XFS_I(inode);
> > +	struct xfs_fsverity_merkle_key	name;
> > +	struct xfs_da_args	args = {
> > +		.dp		= ip,
> > +		.whichfork	= XFS_ATTR_FORK,
> > +		.attr_filter	= XFS_ATTR_VERITY,
> > +		.attr_flags	= XATTR_CREATE,
> 
> What happens if merkle tree fails midway through writing the blobs to
> the xattr tree?  If they're not removed, then won't XATTR_CREATE here
> cause a second attempt to fail because there's a half-built tree?

If write fails during tree building fs-verity calls
xfs_end_enable_verity() which then goes into xfs_drop_merkle_tree()
(if(desc == NULL) case).

> > diff --git a/fs/xfs/xfs_verity.h b/fs/xfs/xfs_verity.h
> > new file mode 100644
> > index 000000000000..0f32fd212091
> > --- /dev/null
> > +++ b/fs/xfs/xfs_verity.h
> > @@ -0,0 +1,37 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2022 Red Hat, Inc.
> > + */
> > +#ifndef __XFS_VERITY_H__
> > +#define __XFS_VERITY_H__
> > +
> > +#include "xfs.h"
> > +#include "xfs_da_format.h"
> > +#include "xfs_da_btree.h"
> > +#include <linux/fsverity.h>
> > +
> > +#define XFS_VERITY_DESCRIPTOR_NAME "verity_descriptor"
> > +#define XFS_VERITY_DESCRIPTOR_NAME_LEN 17
> 
> Want to shorten this by one for u64 alignment? ;)

sure :)

> > +static inline bool
> > +xfs_verity_merkle_block(
> > +		struct xfs_da_args *args)
> > +{
> > +	if (!(args->attr_filter & XFS_ATTR_VERITY))
> > +		return false;
> > +
> > +	if (!(args->op_flags & XFS_DA_OP_BUFFER))
> > +		return false;
> > +
> > +	if (args->valuelen < 1024 || args->valuelen > PAGE_SIZE ||
> > +			!is_power_of_2(args->valuelen))
> > +		return false;
> 
> Why do we check the valuelen here?

I use this function in multiple places to distinguish between
xfs_da_args which contains:
- fs-verity block
- fs-verity descriptor/any other attribute

Check for size is not necessary.

But this is one of the requirements on fs-verity blocks so I thought
it could be good candidate for one additional check. I'm fine with
removing this

> Also, if you're passing the xfs_buf out, I thought the buffer cache
> could handle weird sizes?

Not sure what you mean here
kernel test robot Oct. 18, 2023, 7:18 p.m. UTC | #4
Hi Andrey,

kernel test robot noticed the following build warnings:

[auto build test WARNING on xfs-linux/for-next]
[also build test WARNING on kdave/for-next tytso-ext4/dev jaegeuk-f2fs/dev-test jaegeuk-f2fs/dev linus/master v6.6-rc6 next-20231018]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Andrey-Albershteyn/xfs-Add-new-name-to-attri-d/20231007-025742
base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
patch link:    https://lore.kernel.org/r/20231006184922.252188-26-aalbersh%40redhat.com
patch subject: [PATCH v3 25/28] xfs: add fs-verity support
config: i386-randconfig-063-20231018 (https://download.01.org/0day-ci/archive/20231019/202310190201.brxlwE23-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231019/202310190201.brxlwE23-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310190201.brxlwE23-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> fs/xfs/xfs_verity.c:165:1: sparse: sparse: symbol 'xfs_read_merkle_tree_block' was not declared. Should it be static?
diff mbox series

Patch

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 7762c01a85cf..c1a58ed8b419 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -130,6 +130,7 @@  xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
 xfs-$(CONFIG_SYSCTL)		+= xfs_sysctl.o
 xfs-$(CONFIG_COMPAT)		+= xfs_ioctl32.o
 xfs-$(CONFIG_EXPORTFS_BLOCK_OPS)	+= xfs_pnfs.o
+xfs-$(CONFIG_FS_VERITY)		+= xfs_verity.o
 
 # notify failure
 ifeq ($(CONFIG_MEMORY_FAILURE),y)
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 298b74245267..25e1f829e01e 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -26,6 +26,7 @@ 
 #include "xfs_trace.h"
 #include "xfs_attr_item.h"
 #include "xfs_xattr.h"
+#include "xfs_verity.h"
 
 struct kmem_cache		*xfs_attr_intent_cache;
 
@@ -1635,6 +1636,18 @@  xfs_attr_namecheck(
 		return xfs_verify_pptr(mp, (struct xfs_parent_name_rec *)name);
 	}
 
+	if (flags & XFS_ATTR_VERITY) {
+		/* Merkle tree pages are stored under u64 indexes */
+		if (length == sizeof(struct xfs_fsverity_merkle_key))
+			return true;
+
+		/* Verity descriptor blocks are held in a named attribute. */
+		if (length == XFS_VERITY_DESCRIPTOR_NAME_LEN)
+			return true;
+
+		return false;
+	}
+
 	return xfs_str_attr_namecheck(name, length);
 }
 
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index a84795d70de1..36d1f88d972f 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -29,6 +29,7 @@ 
 #include "xfs_log.h"
 #include "xfs_ag.h"
 #include "xfs_errortag.h"
+#include "xfs_verity.h"
 
 
 /*
@@ -518,7 +519,12 @@  xfs_attr_copy_value(
 		return -ERANGE;
 	}
 
-	if (!args->value) {
+	/*
+	 * We don't want to allocate memory for fs-verity Merkle tree blocks
+	 * (fs-verity descriptor is fine though). They will be stored in
+	 * underlying xfs_buf
+	 */
+	if (!args->value && !xfs_verity_merkle_block(args)) {
 		args->value = kvmalloc(valuelen, GFP_KERNEL | __GFP_NOLOCKDEP);
 		if (!args->value)
 			return -ENOMEM;
@@ -537,7 +543,14 @@  xfs_attr_copy_value(
 	 */
 	if (!value)
 		return -EINVAL;
-	memcpy(args->value, value, valuelen);
+	/*
+	 * We won't copy Merkle tree block to the args->value as we want it be
+	 * in the xfs_buf. And we didn't allocate any memory in args->value.
+	 */
+	if (xfs_verity_merkle_block(args))
+		args->value = value;
+	else
+		memcpy(args->value, value, valuelen);
 	return 0;
 }
 
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 7657daf7cff3..7b4424e3454b 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -22,6 +22,7 @@ 
 #include "xfs_attr_remote.h"
 #include "xfs_trace.h"
 #include "xfs_error.h"
+#include "xfs_verity.h"
 
 #define ATTR_RMTVALUE_MAPSIZE	1	/* # of map entries at once */
 
@@ -401,11 +402,10 @@  xfs_attr_rmtval_get(
 	ASSERT(args->rmtvaluelen == args->valuelen);
 
 	/*
-	 * We also check for _OP_BUFFER as we want to trigger on
-	 * verity blocks only, not on verity_descriptor
+	 * For fs-verity we want additional space in the xfs_buf. This space is
+	 * used to copy xattr value without leaf headers (crc header).
 	 */
-	if (args->attr_filter & XFS_ATTR_VERITY &&
-			args->op_flags & XFS_DA_OP_BUFFER)
+	if (xfs_verity_merkle_block(args))
 		flags = XBF_DOUBLE_ALLOC;
 
 	valuelen = args->rmtvaluelen;
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index b56bdae83563..a678ad5e4a08 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -903,4 +903,20 @@  struct xfs_parent_name_irec {
 	uint8_t			p_namelen;
 };
 
+/*
+ * fs-verity attribute name format
+ *
+ * Merkle tree blocks are stored under extended attributes of the inode. The
+ * name of the attributes are offsets into merkle tree.
+ */
+struct xfs_fsverity_merkle_key {
+	__be64 merkleoff;
+};
+
+static inline void
+xfs_fsverity_merkle_key_to_disk(struct xfs_fsverity_merkle_key *key, loff_t pos)
+{
+	key->merkleoff = cpu_to_be64(pos);
+}
+
 #endif /* __XFS_DA_FORMAT_H__ */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0c5bdb91152e..e6c30a69e8d1 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -342,7 +342,8 @@  static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
  * inactivation completes, both flags will be cleared and the inode is a
  * plain old IRECLAIMABLE inode.
  */
-#define XFS_INACTIVATING	(1 << 13)
+#define XFS_INACTIVATING		(1 << 13)
+#define XFS_IVERITY_CONSTRUCTION	(1 << 14) /* merkle tree construction */
 
 /* Quotacheck is running but inode has not been added to quota counts. */
 #define XFS_IQUOTAUNCHECKED	(1 << 14)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 18c8f168b153..80b249c42067 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -132,6 +132,9 @@  xfs_bmbt_to_iomap(
 	    (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
 		iomap->flags |= IOMAP_F_DIRTY;
 
+	if (fsverity_active(VFS_I(ip)))
+		iomap->flags |= IOMAP_F_READ_VERITY;
+
 	iomap->validity_cookie = sequence_cookie;
 	iomap->folio_ops = &xfs_iomap_folio_ops;
 	return 0;
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index c4cc99b70dd3..accbbdeb7624 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -190,6 +190,10 @@  xfs_check_ondisk_structs(void)
 	XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MIN << XFS_DQ_BIGTIME_SHIFT, 4);
 	XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT,
 			16299260424LL);
+
+	/* fs-verity descriptor xattr name */
+	XFS_CHECK_VALUE(strlen(XFS_VERITY_DESCRIPTOR_NAME),
+			XFS_VERITY_DESCRIPTOR_NAME_LEN);
 }
 
 #endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 6a3b5285044a..f32392add622 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -30,6 +30,7 @@ 
 #include "xfs_filestream.h"
 #include "xfs_quota.h"
 #include "xfs_sysfs.h"
+#include "xfs_verity.h"
 #include "xfs_ondisk.h"
 #include "xfs_rmap_item.h"
 #include "xfs_refcount_item.h"
@@ -1526,6 +1527,9 @@  xfs_fs_fill_super(
 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 #endif
 	sb->s_op = &xfs_super_operations;
+#ifdef CONFIG_FS_VERITY
+	sb->s_vop = &xfs_verity_ops;
+#endif
 
 	/*
 	 * Delay mount work if the debug hook is set. This is debug
@@ -1735,6 +1739,10 @@  xfs_fs_fill_super(
 		goto out_filestream_unmount;
 	}
 
+	if (xfs_has_verity(mp))
+		xfs_alert(mp,
+	"EXPERIMENTAL fs-verity feature in use. Use at your own risk!");
+
 	error = xfs_mountfs(mp);
 	if (error)
 		goto out_filestream_unmount;
diff --git a/fs/xfs/xfs_verity.c b/fs/xfs/xfs_verity.c
new file mode 100644
index 000000000000..a2db56974122
--- /dev/null
+++ b/fs/xfs/xfs_verity.c
@@ -0,0 +1,257 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Red Hat, Inc.
+ */
+#include "xfs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_attr.h"
+#include "xfs_verity.h"
+#include "xfs_bmap_util.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+
+static int
+xfs_get_verity_descriptor(
+	struct inode		*inode,
+	void			*buf,
+	size_t			buf_size)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	int			error = 0;
+	struct xfs_da_args	args = {
+		.dp		= ip,
+		.attr_filter	= XFS_ATTR_VERITY,
+		.name		= (const uint8_t *)XFS_VERITY_DESCRIPTOR_NAME,
+		.namelen	= XFS_VERITY_DESCRIPTOR_NAME_LEN,
+		.value		= buf,
+		.valuelen	= buf_size,
+	};
+
+	/*
+	 * The fact that (returned attribute size) == (provided buf_size) is
+	 * checked by xfs_attr_copy_value() (returns -ERANGE)
+	 */
+	error = xfs_attr_get(&args);
+	if (error)
+		return error;
+
+	return args.valuelen;
+}
+
+static int
+xfs_begin_enable_verity(
+	struct file	    *filp)
+{
+	struct inode	    *inode = file_inode(filp);
+	struct xfs_inode    *ip = XFS_I(inode);
+	int		    error = 0;
+
+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+	if (IS_DAX(inode))
+		return -EINVAL;
+
+	if (xfs_iflags_test_and_set(ip, XFS_IVERITY_CONSTRUCTION))
+		return -EBUSY;
+
+	return error;
+}
+
+static int
+xfs_drop_merkle_tree(
+	struct xfs_inode	*ip,
+	u64			merkle_tree_size,
+	u8			log_blocksize)
+{
+	struct xfs_fsverity_merkle_key	name;
+	int			error = 0, index;
+	u64			offset = 0;
+	struct xfs_da_args	args = {
+		.dp		= ip,
+		.whichfork	= XFS_ATTR_FORK,
+		.attr_filter	= XFS_ATTR_VERITY,
+		.namelen	= sizeof(struct xfs_fsverity_merkle_key),
+		/* NULL value make xfs_attr_set remove the attr */
+		.value		= NULL,
+	};
+
+	for (index = 1; offset < merkle_tree_size; index++) {
+		xfs_fsverity_merkle_key_to_disk(&name, offset);
+		args.name = (const uint8_t *)&name.merkleoff;
+		args.attr_filter = XFS_ATTR_VERITY;
+		error = xfs_attr_set(&args);
+		offset = index << log_blocksize;
+	}
+
+	args.name = (const uint8_t *)XFS_VERITY_DESCRIPTOR_NAME;
+	args.namelen = XFS_VERITY_DESCRIPTOR_NAME_LEN;
+	args.attr_filter = XFS_ATTR_VERITY;
+	error = xfs_attr_set(&args);
+
+	return error;
+}
+
+static int
+xfs_end_enable_verity(
+	struct file		*filp,
+	const void		*desc,
+	size_t			desc_size,
+	u64			merkle_tree_size,
+	u8			log_blocksize)
+{
+	struct inode		*inode = file_inode(filp);
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	struct xfs_da_args	args = {
+		.dp		= ip,
+		.whichfork	= XFS_ATTR_FORK,
+		.attr_filter	= XFS_ATTR_VERITY,
+		.attr_flags	= XATTR_CREATE,
+		.name		= (const uint8_t *)XFS_VERITY_DESCRIPTOR_NAME,
+		.namelen	= XFS_VERITY_DESCRIPTOR_NAME_LEN,
+		.value		= (void *)desc,
+		.valuelen	= desc_size,
+	};
+	int			error = 0;
+
+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+	/* fs-verity failed, just cleanup */
+	if (desc == NULL)
+		goto out;
+
+	error = xfs_attr_set(&args);
+	if (error)
+		goto out;
+
+	/* Set fsverity inode flag */
+	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange,
+			0, 0, false, &tp);
+	if (error)
+		goto out;
+
+	/*
+	 * Ensure that we've persisted the verity information before we enable
+	 * it on the inode and tell the caller we have sealed the inode.
+	 */
+	ip->i_diflags2 |= XFS_DIFLAG2_VERITY;
+
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	xfs_trans_set_sync(tp);
+
+	error = xfs_trans_commit(tp);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+	if (!error)
+		inode->i_flags |= S_VERITY;
+
+out:
+	if (error)
+		WARN_ON_ONCE(xfs_drop_merkle_tree(ip, merkle_tree_size,
+						  log_blocksize));
+
+	xfs_iflags_clear(ip, XFS_IVERITY_CONSTRUCTION);
+	return error;
+}
+
+int
+xfs_read_merkle_tree_block(
+	struct inode		*inode,
+	unsigned int		pos,
+	struct fsverity_block	*block,
+	unsigned long		num_ra_pages)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_fsverity_merkle_key name;
+	int			error = 0;
+	struct xfs_da_args	args = {
+		.dp		= ip,
+		.attr_filter	= XFS_ATTR_VERITY,
+		.namelen	= sizeof(struct xfs_fsverity_merkle_key),
+	};
+	xfs_fsverity_merkle_key_to_disk(&name, pos);
+	args.name = (const uint8_t *)&name.merkleoff;
+
+	error = xfs_attr_get(&args);
+	if (error)
+		goto out;
+
+	WARN_ON_ONCE(!args.valuelen);
+
+	/* now we also want to get underlying xfs_buf */
+	args.op_flags = XFS_DA_OP_BUFFER;
+	error = xfs_attr_get(&args);
+	if (error)
+		goto out;
+
+	block->kaddr = args.value;
+	block->len = args.valuelen;
+	block->cached = args.bp->b_flags & XBF_VERITY_CHECKED;
+	block->context = args.bp;
+
+	return error;
+
+out:
+	kmem_free(args.value);
+	if (args.bp)
+		xfs_buf_rele(args.bp);
+	return error;
+}
+
+static int
+xfs_write_merkle_tree_block(
+	struct inode		*inode,
+	const void		*buf,
+	u64			pos,
+	unsigned int		size)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_fsverity_merkle_key	name;
+	struct xfs_da_args	args = {
+		.dp		= ip,
+		.whichfork	= XFS_ATTR_FORK,
+		.attr_filter	= XFS_ATTR_VERITY,
+		.attr_flags	= XATTR_CREATE,
+		.namelen	= sizeof(struct xfs_fsverity_merkle_key),
+		.value		= (void *)buf,
+		.valuelen	= size,
+	};
+
+	xfs_fsverity_merkle_key_to_disk(&name, pos);
+	args.name = (const uint8_t *)&name.merkleoff;
+
+	return xfs_attr_set(&args);
+}
+
+static void
+xfs_drop_block(
+	struct fsverity_block	*block)
+{
+	struct xfs_buf		*buf;
+
+	ASSERT(block != NULL);
+
+	buf = (struct xfs_buf *)block->context;
+
+	if (block->cached)
+		buf->b_flags |= XBF_VERITY_CHECKED;
+	xfs_buf_rele(buf);
+
+	kunmap_local(block->kaddr);
+}
+
+const struct fsverity_operations xfs_verity_ops = {
+	.begin_enable_verity		= &xfs_begin_enable_verity,
+	.end_enable_verity		= &xfs_end_enable_verity,
+	.get_verity_descriptor		= &xfs_get_verity_descriptor,
+	.read_merkle_tree_block		= &xfs_read_merkle_tree_block,
+	.write_merkle_tree_block	= &xfs_write_merkle_tree_block,
+	.drop_block			= &xfs_drop_block,
+};
diff --git a/fs/xfs/xfs_verity.h b/fs/xfs/xfs_verity.h
new file mode 100644
index 000000000000..0f32fd212091
--- /dev/null
+++ b/fs/xfs/xfs_verity.h
@@ -0,0 +1,37 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Red Hat, Inc.
+ */
+#ifndef __XFS_VERITY_H__
+#define __XFS_VERITY_H__
+
+#include "xfs.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include <linux/fsverity.h>
+
+#define XFS_VERITY_DESCRIPTOR_NAME "verity_descriptor"
+#define XFS_VERITY_DESCRIPTOR_NAME_LEN 17
+
+static inline bool
+xfs_verity_merkle_block(
+		struct xfs_da_args *args)
+{
+	if (!(args->attr_filter & XFS_ATTR_VERITY))
+		return false;
+
+	if (!(args->op_flags & XFS_DA_OP_BUFFER))
+		return false;
+
+	if (args->valuelen < 1024 || args->valuelen > PAGE_SIZE ||
+			!is_power_of_2(args->valuelen))
+		return false;
+
+	return true;
+}
+
+#ifdef CONFIG_FS_VERITY
+extern const struct fsverity_operations xfs_verity_ops;
+#endif	/* CONFIG_FS_VERITY */
+
+#endif	/* __XFS_VERITY_H__ */