diff mbox series

[v7,13/14] xfs: update atomic write limits

Message ID 20250415121425.4146847-14-john.g.garry@oracle.com (mailing list archive)
State New
Headers show
Series large atomic writes for xfs | expand

Commit Message

John Garry April 15, 2025, 12:14 p.m. UTC
Update the limits returned from xfs_get_atomic_write_{min, max, max_opt)().

No reflink support always means no CoW-based atomic writes.

For updating xfs_get_atomic_write_min(), we support blocksize only and that
depends on HW or reflink support.

For updating xfs_get_atomic_write_max(), for no reflink, we are limited to
blocksize but only if HW support. Otherwise we are limited to combined
limit in mp->m_atomic_write_unit_max.

For updating xfs_get_atomic_write_max_opt(), ultimately we are limited by
the bdev atomic write limit. If xfs_get_atomic_write_max() does not report
 > 1x blocksize, then just continue to report 0 as before.

Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
[djwong: update comments in the helper functions]
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/xfs/xfs_file.c |  2 +-
 fs/xfs/xfs_iops.c | 53 +++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 48 insertions(+), 7 deletions(-)

Comments

Darrick J. Wong April 15, 2025, 4:26 p.m. UTC | #1
On Tue, Apr 15, 2025 at 12:14:24PM +0000, John Garry wrote:
> Update the limits returned from xfs_get_atomic_write_{min, max, max_opt)().
> 
> No reflink support always means no CoW-based atomic writes.
> 
> For updating xfs_get_atomic_write_min(), we support blocksize only and that
> depends on HW or reflink support.
> 
> For updating xfs_get_atomic_write_max(), for no reflink, we are limited to
> blocksize but only if HW support. Otherwise we are limited to combined
> limit in mp->m_atomic_write_unit_max.
> 
> For updating xfs_get_atomic_write_max_opt(), ultimately we are limited by
> the bdev atomic write limit. If xfs_get_atomic_write_max() does not report
>  > 1x blocksize, then just continue to report 0 as before.
> 
> Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
> [djwong: update comments in the helper functions]

Same here, there should be a
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>

after this comment.

--D

> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  fs/xfs/xfs_file.c |  2 +-
>  fs/xfs/xfs_iops.c | 53 +++++++++++++++++++++++++++++++++++++++++------
>  2 files changed, 48 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 81a377f65aa3..d1ddbc4a98c3 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1557,7 +1557,7 @@ xfs_file_open(
>  	if (xfs_is_shutdown(XFS_M(inode->i_sb)))
>  		return -EIO;
>  	file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
> -	if (xfs_inode_can_hw_atomicwrite(XFS_I(inode)))
> +	if (xfs_get_atomic_write_min(XFS_I(inode)))
>  		file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
>  	return generic_file_open(inode, file);
>  }
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index 3b5aa39dbfe9..183524d06bc3 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -605,27 +605,68 @@ unsigned int
>  xfs_get_atomic_write_min(
>  	struct xfs_inode	*ip)
>  {
> -	if (!xfs_inode_can_hw_atomicwrite(ip))
> -		return 0;
> +	struct xfs_mount	*mp = ip->i_mount;
> +
> +	/*
> +	 * If we can complete an atomic write via atomic out of place writes,
> +	 * then advertise a minimum size of one fsblock.  Without this
> +	 * mechanism, we can only guarantee atomic writes up to a single LBA.
> +	 *
> +	 * If out of place writes are not available, we can guarantee an atomic
> +	 * write of exactly one single fsblock if the bdev will make that
> +	 * guarantee for us.
> +	 */
> +	if (xfs_inode_can_hw_atomicwrite(ip) || xfs_has_reflink(mp))
> +		return mp->m_sb.sb_blocksize;
>  
> -	return ip->i_mount->m_sb.sb_blocksize;
> +	return 0;
>  }
>  
>  unsigned int
>  xfs_get_atomic_write_max(
>  	struct xfs_inode	*ip)
>  {
> -	if (!xfs_inode_can_hw_atomicwrite(ip))
> +	struct xfs_mount	*mp = ip->i_mount;
> +
> +	/*
> +	 * If out of place writes are not available, we can guarantee an atomic
> +	 * write of exactly one single fsblock if the bdev will make that
> +	 * guarantee for us.
> +	 */
> +	if (!xfs_has_reflink(mp)) {
> +		if (xfs_inode_can_hw_atomicwrite(ip))
> +			return mp->m_sb.sb_blocksize;
>  		return 0;
> +	}
>  
> -	return ip->i_mount->m_sb.sb_blocksize;
> +	/*
> +	 * If we can complete an atomic write via atomic out of place writes,
> +	 * then advertise a maximum size of whatever we can complete through
> +	 * that means.  Hardware support is reported via max_opt, not here.
> +	 */
> +	if (XFS_IS_REALTIME_INODE(ip))
> +		return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max);
> +	return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max);
>  }
>  
>  unsigned int
>  xfs_get_atomic_write_max_opt(
>  	struct xfs_inode	*ip)
>  {
> -	return 0;
> +	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
> +	unsigned int		awu_max = xfs_get_atomic_write_max(ip);
> +
> +	/* if the max is 1x block, then just keep behaviour that opt is 0 */
> +	if (awu_max <= ip->i_mount->m_sb.sb_blocksize)
> +		return 0;
> +
> +	/*
> +	 * Advertise the maximum size of an atomic write that we can tell the
> +	 * block device to perform for us.  In general the bdev limit will be
> +	 * less than our out of place write limit, but we don't want to exceed
> +	 * the awu_max.
> +	 */
> +	return min(awu_max, target->bt_bdev_awu_max);
>  }
>  
>  static void
> -- 
> 2.31.1
> 
>
diff mbox series

Patch

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 81a377f65aa3..d1ddbc4a98c3 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1557,7 +1557,7 @@  xfs_file_open(
 	if (xfs_is_shutdown(XFS_M(inode->i_sb)))
 		return -EIO;
 	file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
-	if (xfs_inode_can_hw_atomicwrite(XFS_I(inode)))
+	if (xfs_get_atomic_write_min(XFS_I(inode)))
 		file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
 	return generic_file_open(inode, file);
 }
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 3b5aa39dbfe9..183524d06bc3 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -605,27 +605,68 @@  unsigned int
 xfs_get_atomic_write_min(
 	struct xfs_inode	*ip)
 {
-	if (!xfs_inode_can_hw_atomicwrite(ip))
-		return 0;
+	struct xfs_mount	*mp = ip->i_mount;
+
+	/*
+	 * If we can complete an atomic write via atomic out of place writes,
+	 * then advertise a minimum size of one fsblock.  Without this
+	 * mechanism, we can only guarantee atomic writes up to a single LBA.
+	 *
+	 * If out of place writes are not available, we can guarantee an atomic
+	 * write of exactly one single fsblock if the bdev will make that
+	 * guarantee for us.
+	 */
+	if (xfs_inode_can_hw_atomicwrite(ip) || xfs_has_reflink(mp))
+		return mp->m_sb.sb_blocksize;
 
-	return ip->i_mount->m_sb.sb_blocksize;
+	return 0;
 }
 
 unsigned int
 xfs_get_atomic_write_max(
 	struct xfs_inode	*ip)
 {
-	if (!xfs_inode_can_hw_atomicwrite(ip))
+	struct xfs_mount	*mp = ip->i_mount;
+
+	/*
+	 * If out of place writes are not available, we can guarantee an atomic
+	 * write of exactly one single fsblock if the bdev will make that
+	 * guarantee for us.
+	 */
+	if (!xfs_has_reflink(mp)) {
+		if (xfs_inode_can_hw_atomicwrite(ip))
+			return mp->m_sb.sb_blocksize;
 		return 0;
+	}
 
-	return ip->i_mount->m_sb.sb_blocksize;
+	/*
+	 * If we can complete an atomic write via atomic out of place writes,
+	 * then advertise a maximum size of whatever we can complete through
+	 * that means.  Hardware support is reported via max_opt, not here.
+	 */
+	if (XFS_IS_REALTIME_INODE(ip))
+		return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max);
+	return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max);
 }
 
 unsigned int
 xfs_get_atomic_write_max_opt(
 	struct xfs_inode	*ip)
 {
-	return 0;
+	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
+	unsigned int		awu_max = xfs_get_atomic_write_max(ip);
+
+	/* if the max is 1x block, then just keep behaviour that opt is 0 */
+	if (awu_max <= ip->i_mount->m_sb.sb_blocksize)
+		return 0;
+
+	/*
+	 * Advertise the maximum size of an atomic write that we can tell the
+	 * block device to perform for us.  In general the bdev limit will be
+	 * less than our out of place write limit, but we don't want to exceed
+	 * the awu_max.
+	 */
+	return min(awu_max, target->bt_bdev_awu_max);
 }
 
 static void