diff mbox

[v2,4/5] xfs: introduce XFS_DIFLAG2_IOMAP_IMMUTABLE

Message ID 150181371001.32119.599597939855384271.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dan Williams Aug. 4, 2017, 2:28 a.m. UTC
Add an on-disk inode flag to record the state of the S_IOMAP_IMMUTABLE
in-memory vfs inode flags. This allows the protections against reflink
and hole punch to be automatically restored on a sub-sequent boot when
the in-memory inode is established.

The FS_XFLAG_IOMAP_IMMUTABLE is introduced to allow xfs_io to read the
state of the flag, but toggling the flag requires going through
fallocate(FALLOC_FL_[UN]SEAL_BLOCK_MAP). Support for toggling this
on-disk state is saved for a later patch.

Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Dave Chinner <david@fromorbit.com>
Suggested-by: "Darrick J. Wong" <darrick.wong@oracle.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/xfs/libxfs/xfs_format.h |    5 ++++-
 fs/xfs/xfs_inode.c         |    2 ++
 fs/xfs/xfs_ioctl.c         |    1 +
 fs/xfs/xfs_iops.c          |    8 +++++---
 include/uapi/linux/fs.h    |    1 +
 5 files changed, 13 insertions(+), 4 deletions(-)

Comments

Darrick J. Wong Aug. 4, 2017, 8:33 p.m. UTC | #1
On Thu, Aug 03, 2017 at 07:28:30PM -0700, Dan Williams wrote:
> Add an on-disk inode flag to record the state of the S_IOMAP_IMMUTABLE
> in-memory vfs inode flags. This allows the protections against reflink
> and hole punch to be automatically restored on a sub-sequent boot when
> the in-memory inode is established.
> 
> The FS_XFLAG_IOMAP_IMMUTABLE is introduced to allow xfs_io to read the
> state of the flag, but toggling the flag requires going through
> fallocate(FALLOC_FL_[UN]SEAL_BLOCK_MAP). Support for toggling this
> on-disk state is saved for a later patch.
> 
> Cc: Jan Kara <jack@suse.cz>
> Cc: Jeff Moyer <jmoyer@redhat.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
> Suggested-by: Dave Chinner <david@fromorbit.com>
> Suggested-by: "Darrick J. Wong" <darrick.wong@oracle.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  fs/xfs/libxfs/xfs_format.h |    5 ++++-
>  fs/xfs/xfs_inode.c         |    2 ++
>  fs/xfs/xfs_ioctl.c         |    1 +
>  fs/xfs/xfs_iops.c          |    8 +++++---
>  include/uapi/linux/fs.h    |    1 +
>  5 files changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> index d4d9bef20c3a..9e720e55776b 100644
> --- a/fs/xfs/libxfs/xfs_format.h
> +++ b/fs/xfs/libxfs/xfs_format.h
> @@ -1063,12 +1063,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
>  #define XFS_DIFLAG2_DAX_BIT	0	/* use DAX for this inode */
>  #define XFS_DIFLAG2_REFLINK_BIT	1	/* file's blocks may be shared */
>  #define XFS_DIFLAG2_COWEXTSIZE_BIT   2  /* copy on write extent size hint */
> +#define XFS_DIFLAG2_IOMAP_IMMUTABLE_BIT 3 /* set S_IOMAP_IMMUTABLE for this inode */

So... the greedy part of my brain that doesn't want to give out flags2
bits has been wondering, what if we just didn't have an on-disk
IOMAP_IMMUTABLE bit, and set FS_XFLAG based only on the in-core
S_IOMAP_IMMUTABLE bit?  If a program wants the immutable iomap
semantics, they will have to code some variant on the following:

fd = open(...);
ret = fallocate(fd, FALLOC_FL_SEAL_BLOCK_MAP, 0, len...)
if (ret) {
	printf("couldn't seal block map");
	close(fd);
	return;
}

mmap(fd...);
/* do sensitive io operations here */
munmap(fd...);

close(fd);

Therefore the cost of not having the on-disk flag is that we'll have to
do more unshare/alloc/test/set cycles than we would if we could remember
the iomap-immutable state across unmounts and inode reclaiming.
However, if the data map is already ready to go, this shouldn't have a
lot of overhead since we only have to iterate the in-core extents.

Just trying to make sure we /need/ the inode flag bit. :)

--D

>  #define XFS_DIFLAG2_DAX		(1 << XFS_DIFLAG2_DAX_BIT)
>  #define XFS_DIFLAG2_REFLINK     (1 << XFS_DIFLAG2_REFLINK_BIT)
>  #define XFS_DIFLAG2_COWEXTSIZE  (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
> +#define XFS_DIFLAG2_IOMAP_IMMUTABLE (1 << XFS_DIFLAG2_IOMAP_IMMUTABLE_BIT)
>  
>  #define XFS_DIFLAG2_ANY \
> -	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)
> +	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
> +	 XFS_DIFLAG2_IOMAP_IMMUTABLE)
>  
>  /*
>   * Inode number format:
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index ceef77c0416a..4ca22e272ce6 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -674,6 +674,8 @@ _xfs_dic2xflags(
>  			flags |= FS_XFLAG_DAX;
>  		if (di_flags2 & XFS_DIFLAG2_COWEXTSIZE)
>  			flags |= FS_XFLAG_COWEXTSIZE;
> +		if (di_flags2 & XFS_DIFLAG2_IOMAP_IMMUTABLE)
> +			flags |= FS_XFLAG_IOMAP_IMMUTABLE;
>  	}
>  
>  	if (has_attr)
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index 2e64488bc4de..df2eef0f9d45 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -978,6 +978,7 @@ xfs_set_diflags(
>  		return;
>  
>  	di_flags2 = (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
> +	di_flags2 |= (ip->i_d.di_flags2 & XFS_DIFLAG2_IOMAP_IMMUTABLE);
>  	if (xflags & FS_XFLAG_DAX)
>  		di_flags2 |= XFS_DIFLAG2_DAX;
>  	if (xflags & FS_XFLAG_COWEXTSIZE)
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index 469c9fa4c178..174ef95453f5 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -1186,9 +1186,10 @@ xfs_diflags_to_iflags(
>  	struct xfs_inode	*ip)
>  {
>  	uint16_t		flags = ip->i_d.di_flags;
> +	uint64_t		flags2 = ip->i_d.di_flags2;
>  
>  	inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC |
> -			    S_NOATIME | S_DAX);
> +			    S_NOATIME | S_DAX | S_IOMAP_IMMUTABLE);
>  
>  	if (flags & XFS_DIFLAG_IMMUTABLE)
>  		inode->i_flags |= S_IMMUTABLE;
> @@ -1201,9 +1202,10 @@ xfs_diflags_to_iflags(
>  	if (S_ISREG(inode->i_mode) &&
>  	    ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE &&
>  	    !xfs_is_reflink_inode(ip) &&
> -	    (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
> -	     ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
> +	    (ip->i_mount->m_flags & XFS_MOUNT_DAX || flags2 & XFS_DIFLAG2_DAX))
>  		inode->i_flags |= S_DAX;
> +	if (flags2 & XFS_DIFLAG2_IOMAP_IMMUTABLE)
> +		inode->i_flags |= S_IOMAP_IMMUTABLE;
>  }
>  
>  /*
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index b7495d05e8de..4765e024ad74 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -182,6 +182,7 @@ struct fsxattr {
>  #define FS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
>  #define FS_XFLAG_DAX		0x00008000	/* use DAX for IO */
>  #define FS_XFLAG_COWEXTSIZE	0x00010000	/* CoW extent size allocator hint */
> +#define FS_XFLAG_IOMAP_IMMUTABLE 0x00020000	/* block map immutable */
>  #define FS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
>  
>  /* the read-only stuff doesn't really belong here, but any other place is
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dan Williams Aug. 4, 2017, 8:45 p.m. UTC | #2
On Fri, Aug 4, 2017 at 1:33 PM, Darrick J. Wong <darrick.wong@oracle.com> wrote:
> On Thu, Aug 03, 2017 at 07:28:30PM -0700, Dan Williams wrote:
>> Add an on-disk inode flag to record the state of the S_IOMAP_IMMUTABLE
>> in-memory vfs inode flags. This allows the protections against reflink
>> and hole punch to be automatically restored on a sub-sequent boot when
>> the in-memory inode is established.
>>
>> The FS_XFLAG_IOMAP_IMMUTABLE is introduced to allow xfs_io to read the
>> state of the flag, but toggling the flag requires going through
>> fallocate(FALLOC_FL_[UN]SEAL_BLOCK_MAP). Support for toggling this
>> on-disk state is saved for a later patch.
>>
>> Cc: Jan Kara <jack@suse.cz>
>> Cc: Jeff Moyer <jmoyer@redhat.com>
>> Cc: Christoph Hellwig <hch@lst.de>
>> Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
>> Suggested-by: Dave Chinner <david@fromorbit.com>
>> Suggested-by: "Darrick J. Wong" <darrick.wong@oracle.com>
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>> ---
>>  fs/xfs/libxfs/xfs_format.h |    5 ++++-
>>  fs/xfs/xfs_inode.c         |    2 ++
>>  fs/xfs/xfs_ioctl.c         |    1 +
>>  fs/xfs/xfs_iops.c          |    8 +++++---
>>  include/uapi/linux/fs.h    |    1 +
>>  5 files changed, 13 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
>> index d4d9bef20c3a..9e720e55776b 100644
>> --- a/fs/xfs/libxfs/xfs_format.h
>> +++ b/fs/xfs/libxfs/xfs_format.h
>> @@ -1063,12 +1063,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
>>  #define XFS_DIFLAG2_DAX_BIT  0       /* use DAX for this inode */
>>  #define XFS_DIFLAG2_REFLINK_BIT      1       /* file's blocks may be shared */
>>  #define XFS_DIFLAG2_COWEXTSIZE_BIT   2  /* copy on write extent size hint */
>> +#define XFS_DIFLAG2_IOMAP_IMMUTABLE_BIT 3 /* set S_IOMAP_IMMUTABLE for this inode */
>
> So... the greedy part of my brain that doesn't want to give out flags2
> bits has been wondering, what if we just didn't have an on-disk
> IOMAP_IMMUTABLE bit, and set FS_XFLAG based only on the in-core
> S_IOMAP_IMMUTABLE bit?  If a program wants the immutable iomap
> semantics, they will have to code some variant on the following:
>
> fd = open(...);
> ret = fallocate(fd, FALLOC_FL_SEAL_BLOCK_MAP, 0, len...)
> if (ret) {
>         printf("couldn't seal block map");
>         close(fd);
>         return;
> }
>
> mmap(fd...);
> /* do sensitive io operations here */
> munmap(fd...);
>
> close(fd);
>
> Therefore the cost of not having the on-disk flag is that we'll have to
> do more unshare/alloc/test/set cycles than we would if we could remember
> the iomap-immutable state across unmounts and inode reclaiming.
> However, if the data map is already ready to go, this shouldn't have a
> lot of overhead since we only have to iterate the in-core extents.
>
> Just trying to make sure we /need/ the inode flag bit. :)

A fair point.

The use case I imagine is a privileged (CAP_IMMUTABLE) process setting
up the data file space at server provisioning time, and then
unprivileged processes using the immutable semantic thereafter.
Dave Chinner Aug. 4, 2017, 11:46 p.m. UTC | #3
On Fri, Aug 04, 2017 at 01:33:12PM -0700, Darrick J. Wong wrote:
> On Thu, Aug 03, 2017 at 07:28:30PM -0700, Dan Williams wrote:
> > Add an on-disk inode flag to record the state of the S_IOMAP_IMMUTABLE
> > in-memory vfs inode flags. This allows the protections against reflink
> > and hole punch to be automatically restored on a sub-sequent boot when
> > the in-memory inode is established.
> > 
> > The FS_XFLAG_IOMAP_IMMUTABLE is introduced to allow xfs_io to read the
> > state of the flag, but toggling the flag requires going through
> > fallocate(FALLOC_FL_[UN]SEAL_BLOCK_MAP). Support for toggling this
> > on-disk state is saved for a later patch.
> > 
> > Cc: Jan Kara <jack@suse.cz>
> > Cc: Jeff Moyer <jmoyer@redhat.com>
> > Cc: Christoph Hellwig <hch@lst.de>
> > Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
> > Suggested-by: Dave Chinner <david@fromorbit.com>
> > Suggested-by: "Darrick J. Wong" <darrick.wong@oracle.com>
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > ---
> >  fs/xfs/libxfs/xfs_format.h |    5 ++++-
> >  fs/xfs/xfs_inode.c         |    2 ++
> >  fs/xfs/xfs_ioctl.c         |    1 +
> >  fs/xfs/xfs_iops.c          |    8 +++++---
> >  include/uapi/linux/fs.h    |    1 +
> >  5 files changed, 13 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> > index d4d9bef20c3a..9e720e55776b 100644
> > --- a/fs/xfs/libxfs/xfs_format.h
> > +++ b/fs/xfs/libxfs/xfs_format.h
> > @@ -1063,12 +1063,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
> >  #define XFS_DIFLAG2_DAX_BIT	0	/* use DAX for this inode */
> >  #define XFS_DIFLAG2_REFLINK_BIT	1	/* file's blocks may be shared */
> >  #define XFS_DIFLAG2_COWEXTSIZE_BIT   2  /* copy on write extent size hint */
> > +#define XFS_DIFLAG2_IOMAP_IMMUTABLE_BIT 3 /* set S_IOMAP_IMMUTABLE for this inode */
> 
> So... the greedy part of my brain that doesn't want to give out flags2
> bits has been wondering,

FWIW, I made di_flags2 a 64 bit value in the first place precisely
so we didn't have a scarcity problem and can just give out flag
bits for enabling new functionality like this...

> what if we just didn't have an on-disk
> IOMAP_IMMUTABLE bit, and set FS_XFLAG based only on the in-core
> S_IOMAP_IMMUTABLE bit?  If a program wants the immutable iomap
> semantics, they will have to code some variant on the following:
> 
> fd = open(...);
> ret = fallocate(fd, FALLOC_FL_SEAL_BLOCK_MAP, 0, len...)
> if (ret) {
> 	printf("couldn't seal block map");
> 	close(fd);
> 	return;
> }
> 
> mmap(fd...);
> /* do sensitive io operations here */
> munmap(fd...);
> 
> close(fd);
> 
> Therefore the cost of not having the on-disk flag is that we'll have to
> do more unshare/alloc/test/set cycles than we would if we could remember
> the iomap-immutable state across unmounts and inode reclaiming.
> However, if the data map is already ready to go, this shouldn't have a
> lot of overhead since we only have to iterate the in-core extents.
> 
> Just trying to make sure we /need/ the inode flag bit. :)

IMO, fallocate() is for making permanent changes to file extents. If
this is not going to be a permanent state change but only a
runtime-while-the-inode-is-in-cache flag, then it's probably not the
right interface to use.

This also seems problematic for applications other than DAX where
the block map may be sealed, the fd closed and access handed off to
another entity for remote storage access. If the inode gets
reclaimed due to memory pressure, the system loses the fact that
that the inode has been sealed. Hence another process can come
along, re-read the inode and modify the block map because it hasn't
been sealed in this new cache life cycle.....

Cheers,

Dave.
Darrick J. Wong Aug. 4, 2017, 11:57 p.m. UTC | #4
On Sat, Aug 05, 2017 at 09:46:15AM +1000, Dave Chinner wrote:
> On Fri, Aug 04, 2017 at 01:33:12PM -0700, Darrick J. Wong wrote:
> > On Thu, Aug 03, 2017 at 07:28:30PM -0700, Dan Williams wrote:
> > > Add an on-disk inode flag to record the state of the S_IOMAP_IMMUTABLE
> > > in-memory vfs inode flags. This allows the protections against reflink
> > > and hole punch to be automatically restored on a sub-sequent boot when
> > > the in-memory inode is established.
> > > 
> > > The FS_XFLAG_IOMAP_IMMUTABLE is introduced to allow xfs_io to read the
> > > state of the flag, but toggling the flag requires going through
> > > fallocate(FALLOC_FL_[UN]SEAL_BLOCK_MAP). Support for toggling this
> > > on-disk state is saved for a later patch.
> > > 
> > > Cc: Jan Kara <jack@suse.cz>
> > > Cc: Jeff Moyer <jmoyer@redhat.com>
> > > Cc: Christoph Hellwig <hch@lst.de>
> > > Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
> > > Suggested-by: Dave Chinner <david@fromorbit.com>
> > > Suggested-by: "Darrick J. Wong" <darrick.wong@oracle.com>
> > > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > > ---
> > >  fs/xfs/libxfs/xfs_format.h |    5 ++++-
> > >  fs/xfs/xfs_inode.c         |    2 ++
> > >  fs/xfs/xfs_ioctl.c         |    1 +
> > >  fs/xfs/xfs_iops.c          |    8 +++++---
> > >  include/uapi/linux/fs.h    |    1 +
> > >  5 files changed, 13 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> > > index d4d9bef20c3a..9e720e55776b 100644
> > > --- a/fs/xfs/libxfs/xfs_format.h
> > > +++ b/fs/xfs/libxfs/xfs_format.h
> > > @@ -1063,12 +1063,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
> > >  #define XFS_DIFLAG2_DAX_BIT	0	/* use DAX for this inode */
> > >  #define XFS_DIFLAG2_REFLINK_BIT	1	/* file's blocks may be shared */
> > >  #define XFS_DIFLAG2_COWEXTSIZE_BIT   2  /* copy on write extent size hint */
> > > +#define XFS_DIFLAG2_IOMAP_IMMUTABLE_BIT 3 /* set S_IOMAP_IMMUTABLE for this inode */
> > 
> > So... the greedy part of my brain that doesn't want to give out flags2
> > bits has been wondering,
> 
> FWIW, I made di_flags2 a 64 bit value in the first place precisely
> so we didn't have a scarcity problem and can just give out flag
> bits for enabling new functionality like this...

Ok.  That's what I thought.

> > what if we just didn't have an on-disk
> > IOMAP_IMMUTABLE bit, and set FS_XFLAG based only on the in-core
> > S_IOMAP_IMMUTABLE bit?  If a program wants the immutable iomap
> > semantics, they will have to code some variant on the following:
> > 
> > fd = open(...);
> > ret = fallocate(fd, FALLOC_FL_SEAL_BLOCK_MAP, 0, len...)
> > if (ret) {
> > 	printf("couldn't seal block map");
> > 	close(fd);
> > 	return;
> > }
> > 
> > mmap(fd...);
> > /* do sensitive io operations here */
> > munmap(fd...);
> > 
> > close(fd);
> > 
> > Therefore the cost of not having the on-disk flag is that we'll have to
> > do more unshare/alloc/test/set cycles than we would if we could remember
> > the iomap-immutable state across unmounts and inode reclaiming.
> > However, if the data map is already ready to go, this shouldn't have a
> > lot of overhead since we only have to iterate the in-core extents.
> > 
> > Just trying to make sure we /need/ the inode flag bit. :)
> 
> IMO, fallocate() is for making permanent changes to file extents. If
> this is not going to be a permanent state change but only a
> runtime-while-the-inode-is-in-cache flag, then it's probably not the
> right interface to use.
> 
> This also seems problematic for applications other than DAX where
> the block map may be sealed, the fd closed and access handed off to
> another entity for remote storage access. If the inode gets
> reclaimed due to memory pressure, the system loses the fact that
> that the inode has been sealed. Hence another process can come
> along, re-read the inode and modify the block map because it hasn't
> been sealed in this new cache life cycle.....

<nod> Ok, I'm convinced. :)

--D

> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index d4d9bef20c3a..9e720e55776b 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1063,12 +1063,15 @@  static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 #define XFS_DIFLAG2_DAX_BIT	0	/* use DAX for this inode */
 #define XFS_DIFLAG2_REFLINK_BIT	1	/* file's blocks may be shared */
 #define XFS_DIFLAG2_COWEXTSIZE_BIT   2  /* copy on write extent size hint */
+#define XFS_DIFLAG2_IOMAP_IMMUTABLE_BIT 3 /* set S_IOMAP_IMMUTABLE for this inode */
 #define XFS_DIFLAG2_DAX		(1 << XFS_DIFLAG2_DAX_BIT)
 #define XFS_DIFLAG2_REFLINK     (1 << XFS_DIFLAG2_REFLINK_BIT)
 #define XFS_DIFLAG2_COWEXTSIZE  (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
+#define XFS_DIFLAG2_IOMAP_IMMUTABLE (1 << XFS_DIFLAG2_IOMAP_IMMUTABLE_BIT)
 
 #define XFS_DIFLAG2_ANY \
-	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)
+	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
+	 XFS_DIFLAG2_IOMAP_IMMUTABLE)
 
 /*
  * Inode number format:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ceef77c0416a..4ca22e272ce6 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -674,6 +674,8 @@  _xfs_dic2xflags(
 			flags |= FS_XFLAG_DAX;
 		if (di_flags2 & XFS_DIFLAG2_COWEXTSIZE)
 			flags |= FS_XFLAG_COWEXTSIZE;
+		if (di_flags2 & XFS_DIFLAG2_IOMAP_IMMUTABLE)
+			flags |= FS_XFLAG_IOMAP_IMMUTABLE;
 	}
 
 	if (has_attr)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 2e64488bc4de..df2eef0f9d45 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -978,6 +978,7 @@  xfs_set_diflags(
 		return;
 
 	di_flags2 = (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
+	di_flags2 |= (ip->i_d.di_flags2 & XFS_DIFLAG2_IOMAP_IMMUTABLE);
 	if (xflags & FS_XFLAG_DAX)
 		di_flags2 |= XFS_DIFLAG2_DAX;
 	if (xflags & FS_XFLAG_COWEXTSIZE)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 469c9fa4c178..174ef95453f5 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1186,9 +1186,10 @@  xfs_diflags_to_iflags(
 	struct xfs_inode	*ip)
 {
 	uint16_t		flags = ip->i_d.di_flags;
+	uint64_t		flags2 = ip->i_d.di_flags2;
 
 	inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC |
-			    S_NOATIME | S_DAX);
+			    S_NOATIME | S_DAX | S_IOMAP_IMMUTABLE);
 
 	if (flags & XFS_DIFLAG_IMMUTABLE)
 		inode->i_flags |= S_IMMUTABLE;
@@ -1201,9 +1202,10 @@  xfs_diflags_to_iflags(
 	if (S_ISREG(inode->i_mode) &&
 	    ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE &&
 	    !xfs_is_reflink_inode(ip) &&
-	    (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
-	     ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
+	    (ip->i_mount->m_flags & XFS_MOUNT_DAX || flags2 & XFS_DIFLAG2_DAX))
 		inode->i_flags |= S_DAX;
+	if (flags2 & XFS_DIFLAG2_IOMAP_IMMUTABLE)
+		inode->i_flags |= S_IOMAP_IMMUTABLE;
 }
 
 /*
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index b7495d05e8de..4765e024ad74 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -182,6 +182,7 @@  struct fsxattr {
 #define FS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
 #define FS_XFLAG_DAX		0x00008000	/* use DAX for IO */
 #define FS_XFLAG_COWEXTSIZE	0x00010000	/* CoW extent size allocator hint */
+#define FS_XFLAG_IOMAP_IMMUTABLE 0x00020000	/* block map immutable */
 #define FS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
 
 /* the read-only stuff doesn't really belong here, but any other place is