diff mbox series

[2/2] xfs: fix fallocate functions when rtextsize is larger than 1

Message ID 160235127396.1384192.5095447151831725417.stgit@magnolia
State Accepted
Headers show
Series xfs: hopefully the last few rt fixes | expand

Commit Message

Darrick J. Wong Oct. 10, 2020, 5:34 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

In commit fe341eb151ec, I forgot that xfs_free_file_space isn't strictly
a "remove mapped blocks" function.  It is actually a function to zero
file space by punching out the middle and writing zeroes to the
unaligned ends of the specified range.  Therefore, putting a rtextsize
alignment check in that function is wrong because that breaks unaligned
ZERO_RANGE on the realtime volume.

Furthermore, xfs_file_fallocate already has alignment checks for the
functions require the file range to be aligned to the size of a
fundamental allocation unit (which is 1 FSB on the data volume and 1 rt
extent on the realtime volume).  Create a new helper to return the
desired allocation unit size, fix the fallocate frontend to use it,
fix free_file_space to delete the correct range, and remove a now
redundant check from insert_file_space.

Fixes: fe341eb151ec ("xfs: ensure that fpunch, fcollapse, and finsert operations are aligned to rt extent size")
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c |   17 ++++-------------
 fs/xfs/xfs_file.c      |   10 ++++------
 fs/xfs/xfs_inode.c     |   13 +++++++++++++
 fs/xfs/xfs_inode.h     |    1 +
 4 files changed, 22 insertions(+), 19 deletions(-)

Comments

Chandan Babu R Oct. 12, 2020, 6:28 a.m. UTC | #1
On Saturday 10 October 2020 11:04:34 PM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> In commit fe341eb151ec, I forgot that xfs_free_file_space isn't strictly
> a "remove mapped blocks" function.  It is actually a function to zero
> file space by punching out the middle and writing zeroes to the
> unaligned ends of the specified range.  Therefore, putting a rtextsize
> alignment check in that function is wrong because that breaks unaligned
> ZERO_RANGE on the realtime volume.
> 
> Furthermore, xfs_file_fallocate already has alignment checks for the
> functions require the file range to be aligned to the size of a
> fundamental allocation unit (which is 1 FSB on the data volume and 1 rt
> extent on the realtime volume).  Create a new helper to return the
> desired allocation unit size, fix the fallocate frontend to use it,
> fix free_file_space to delete the correct range, and remove a now
> redundant check from insert_file_space.
>

The changes look good to me.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Fixes: fe341eb151ec ("xfs: ensure that fpunch, fcollapse, and finsert operations are aligned to rt extent size")
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_bmap_util.c |   17 ++++-------------
>  fs/xfs/xfs_file.c      |   10 ++++------
>  fs/xfs/xfs_inode.c     |   13 +++++++++++++
>  fs/xfs/xfs_inode.h     |    1 +
>  4 files changed, 22 insertions(+), 19 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index f2a8a0e75e1f..52cddcfee8a1 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -947,11 +947,10 @@ xfs_free_file_space(
>  	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
>  
>  	/* We can only free complete realtime extents. */
> -	if (XFS_IS_REALTIME_INODE(ip)) {
> -		xfs_extlen_t	extsz = xfs_get_extsz_hint(ip);
> -
> -		if ((startoffset_fsb | endoffset_fsb) & (extsz - 1))
> -			return -EINVAL;
> +	if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 0) {
> +		startoffset_fsb = round_up(startoffset_fsb,
> +					   mp->m_sb.sb_rextsize);
> +		endoffset_fsb = round_down(endoffset_fsb, mp->m_sb.sb_rextsize);
>  	}
>  
>  	/*
> @@ -1147,14 +1146,6 @@ xfs_insert_file_space(
>  
>  	trace_xfs_insert_file_space(ip);
>  
> -	/* We can only insert complete realtime extents. */
> -	if (XFS_IS_REALTIME_INODE(ip)) {
> -		xfs_extlen_t	extsz = xfs_get_extsz_hint(ip);
> -
> -		if ((stop_fsb | shift_fsb) & (extsz - 1))
> -			return -EINVAL;
> -	}
> -
>  	error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
>  	if (error)
>  		return error;
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 3d1b95124744..e9b4b1dada75 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -803,6 +803,8 @@ xfs_file_fallocate(
>  	enum xfs_prealloc_flags	flags = 0;
>  	uint			iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
>  	loff_t			new_size = 0;
> +	unsigned int		blksize = xfs_inode_alloc_blocksize(ip);
> +	unsigned int		blkmask = blksize - 1;
>  	bool			do_file_insert = false;
>  
>  	if (!S_ISREG(inode->i_mode))
> @@ -850,9 +852,7 @@ xfs_file_fallocate(
>  		if (error)
>  			goto out_unlock;
>  	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
> -		unsigned int blksize_mask = i_blocksize(inode) - 1;
> -
> -		if (offset & blksize_mask || len & blksize_mask) {
> +		if ((offset | len) & blkmask) {
>  			error = -EINVAL;
>  			goto out_unlock;
>  		}
> @@ -872,10 +872,9 @@ xfs_file_fallocate(
>  		if (error)
>  			goto out_unlock;
>  	} else if (mode & FALLOC_FL_INSERT_RANGE) {
> -		unsigned int	blksize_mask = i_blocksize(inode) - 1;
>  		loff_t		isize = i_size_read(inode);
>  
> -		if (offset & blksize_mask || len & blksize_mask) {
> +		if ((offset | len) & blkmask) {
>  			error = -EINVAL;
>  			goto out_unlock;
>  		}
> @@ -917,7 +916,6 @@ xfs_file_fallocate(
>  			 *   2.) If prealloc returns ENOSPC, the file range is
>  			 *       still zero-valued by virtue of the hole punch.
>  			 */
> -			unsigned int blksize = i_blocksize(inode);
>  
>  			trace_xfs_zero_file_space(ip);
>  
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 2bfbcf28b1bd..20bb5fae0d00 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -3813,3 +3813,16 @@ xfs_iunlock2_io_mmap(
>  	if (!same_inode)
>  		inode_unlock(VFS_I(ip1));
>  }
> +
> +/* Returns the size of fundamental allocation unit for a file, in bytes. */
> +unsigned int
> +xfs_inode_alloc_blocksize(
> +	struct xfs_inode	*ip)
> +{
> +	unsigned int		blocks = 1;
> +
> +	if (XFS_IS_REALTIME_INODE(ip))
> +		blocks = ip->i_mount->m_sb.sb_rextsize;
> +
> +	return XFS_FSB_TO_B(ip->i_mount, blocks);
> +}
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 751a3d1d7d84..270b35d9dcb0 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -475,5 +475,6 @@ void xfs_end_io(struct work_struct *work);
>  
>  int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
>  void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
> +unsigned int xfs_inode_alloc_blocksize(struct xfs_inode *ip);
>  
>  #endif	/* __XFS_INODE_H__ */
> 
>
Christoph Hellwig Oct. 15, 2020, 7:54 a.m. UTC | #2
I don't really like the xfs_inode_alloc_blocksize helper, given that
it is very easy to be confused with the allocsize concept.

I'd just add a helper ala:

static bool
xfs_falloc_is_unaligned(
	struct inode		*inode,
	loff_t			offset,
	loff_t			len)
{
	struct xfs_mount	*mp = XFS_I(ip)->i_mount;

	unsigned int blksize_mask = i_blocksize(inode) - 1;

	if (XFS_IS_REALTIME_INODE(XFS_I(ip)))
		blksize_mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1;

	return (offset & blksize_mask) || (len & blksize_mask);
}
Darrick J. Wong Oct. 16, 2020, 10:02 p.m. UTC | #3
On Thu, Oct 15, 2020 at 09:54:39AM +0200, Christoph Hellwig wrote:
> I don't really like the xfs_inode_alloc_blocksize helper, given that
> it is very easy to be confused with the allocsize concept.
> 
> I'd just add a helper ala:
> 
> static bool
> xfs_falloc_is_unaligned(
> 	struct inode		*inode,
> 	loff_t			offset,
> 	loff_t			len)
> {
> 	struct xfs_mount	*mp = XFS_I(ip)->i_mount;
> 
> 	unsigned int blksize_mask = i_blocksize(inode) - 1;
> 
> 	if (XFS_IS_REALTIME_INODE(XFS_I(ip)))
> 		blksize_mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1;

UGH the thing that I just noticed (and fstests doesn't seem to cover
given the number of them that sort of blew up) is the fact that the rt
extent size only has to be some multiple of the fs blocksize, not the
power-of-2 multiple that I mistakenly assumed.

Soooo none of this masking stuff actually works properly and I'm going
to drop this patch until I figure out how to do this properly, with a
bunch of fugly division and whatnot... I guess the silver lining is that
rtextsize can't be larger than 1G so at least I can probably use simple
division and not the div64 mess.

Self NAK.

--D

> 
> 	return (offset & blksize_mask) || (len & blksize_mask);
> }
diff mbox series

Patch

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f2a8a0e75e1f..52cddcfee8a1 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -947,11 +947,10 @@  xfs_free_file_space(
 	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
 
 	/* We can only free complete realtime extents. */
-	if (XFS_IS_REALTIME_INODE(ip)) {
-		xfs_extlen_t	extsz = xfs_get_extsz_hint(ip);
-
-		if ((startoffset_fsb | endoffset_fsb) & (extsz - 1))
-			return -EINVAL;
+	if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 0) {
+		startoffset_fsb = round_up(startoffset_fsb,
+					   mp->m_sb.sb_rextsize);
+		endoffset_fsb = round_down(endoffset_fsb, mp->m_sb.sb_rextsize);
 	}
 
 	/*
@@ -1147,14 +1146,6 @@  xfs_insert_file_space(
 
 	trace_xfs_insert_file_space(ip);
 
-	/* We can only insert complete realtime extents. */
-	if (XFS_IS_REALTIME_INODE(ip)) {
-		xfs_extlen_t	extsz = xfs_get_extsz_hint(ip);
-
-		if ((stop_fsb | shift_fsb) & (extsz - 1))
-			return -EINVAL;
-	}
-
 	error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
 	if (error)
 		return error;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 3d1b95124744..e9b4b1dada75 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -803,6 +803,8 @@  xfs_file_fallocate(
 	enum xfs_prealloc_flags	flags = 0;
 	uint			iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
 	loff_t			new_size = 0;
+	unsigned int		blksize = xfs_inode_alloc_blocksize(ip);
+	unsigned int		blkmask = blksize - 1;
 	bool			do_file_insert = false;
 
 	if (!S_ISREG(inode->i_mode))
@@ -850,9 +852,7 @@  xfs_file_fallocate(
 		if (error)
 			goto out_unlock;
 	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
-		unsigned int blksize_mask = i_blocksize(inode) - 1;
-
-		if (offset & blksize_mask || len & blksize_mask) {
+		if ((offset | len) & blkmask) {
 			error = -EINVAL;
 			goto out_unlock;
 		}
@@ -872,10 +872,9 @@  xfs_file_fallocate(
 		if (error)
 			goto out_unlock;
 	} else if (mode & FALLOC_FL_INSERT_RANGE) {
-		unsigned int	blksize_mask = i_blocksize(inode) - 1;
 		loff_t		isize = i_size_read(inode);
 
-		if (offset & blksize_mask || len & blksize_mask) {
+		if ((offset | len) & blkmask) {
 			error = -EINVAL;
 			goto out_unlock;
 		}
@@ -917,7 +916,6 @@  xfs_file_fallocate(
 			 *   2.) If prealloc returns ENOSPC, the file range is
 			 *       still zero-valued by virtue of the hole punch.
 			 */
-			unsigned int blksize = i_blocksize(inode);
 
 			trace_xfs_zero_file_space(ip);
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2bfbcf28b1bd..20bb5fae0d00 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3813,3 +3813,16 @@  xfs_iunlock2_io_mmap(
 	if (!same_inode)
 		inode_unlock(VFS_I(ip1));
 }
+
+/* Returns the size of fundamental allocation unit for a file, in bytes. */
+unsigned int
+xfs_inode_alloc_blocksize(
+	struct xfs_inode	*ip)
+{
+	unsigned int		blocks = 1;
+
+	if (XFS_IS_REALTIME_INODE(ip))
+		blocks = ip->i_mount->m_sb.sb_rextsize;
+
+	return XFS_FSB_TO_B(ip->i_mount, blocks);
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 751a3d1d7d84..270b35d9dcb0 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -475,5 +475,6 @@  void xfs_end_io(struct work_struct *work);
 
 int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
 void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
+unsigned int xfs_inode_alloc_blocksize(struct xfs_inode *ip);
 
 #endif	/* __XFS_INODE_H__ */