diff mbox series

[v5,08/10] xfs: Update atomic write max size

Message ID 20250310183946.932054-9-john.g.garry@oracle.com (mailing list archive)
State New
Headers show
Series large atomic writes for xfs with CoW | expand

Commit Message

John Garry March 10, 2025, 6:39 p.m. UTC
Now that CoW-based atomic writes are supported, update the max size of an
atomic write.

For simplicity, limit at the max of what the mounted bdev can support in
terms of atomic write limits. Maybe in future we will have a better way
to advertise this optimised limit.

In addition, the max atomic write size needs to be aligned to the agsize.
Limit the size of atomic writes to the greatest power-of-two factor of the
agsize so that allocations for an atomic write will always be aligned
compatibly with the alignment requirements of the storage.

For RT inode, just limit to 1x block, even though larger can be supported
in future.

Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/xfs/xfs_iops.c  | 14 +++++++++++++-
 fs/xfs/xfs_mount.c | 28 ++++++++++++++++++++++++++++
 fs/xfs/xfs_mount.h |  1 +
 3 files changed, 42 insertions(+), 1 deletion(-)

Comments

Carlos Maiolino March 11, 2025, 2:40 p.m. UTC | #1
Thanks for updating it John.

Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>

On Mon, Mar 10, 2025 at 06:39:44PM +0000, John Garry wrote:
> Now that CoW-based atomic writes are supported, update the max size of an
> atomic write.
> 
> For simplicity, limit at the max of what the mounted bdev can support in
> terms of atomic write limits. Maybe in future we will have a better way
> to advertise this optimised limit.
> 
> In addition, the max atomic write size needs to be aligned to the agsize.
> Limit the size of atomic writes to the greatest power-of-two factor of the
> agsize so that allocations for an atomic write will always be aligned
> compatibly with the alignment requirements of the storage.
> 
> For RT inode, just limit to 1x block, even though larger can be supported
> in future.
> 
> Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  fs/xfs/xfs_iops.c  | 14 +++++++++++++-
>  fs/xfs/xfs_mount.c | 28 ++++++++++++++++++++++++++++
>  fs/xfs/xfs_mount.h |  1 +
>  3 files changed, 42 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index de065cc2e7cf..16a1f9541690 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -607,12 +607,24 @@ xfs_get_atomic_write_attr(
>  	unsigned int		*unit_min,
>  	unsigned int		*unit_max)
>  {
> +	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
> +	struct xfs_mount	*mp = ip->i_mount;
> +
>  	if (!xfs_inode_can_atomicwrite(ip)) {
>  		*unit_min = *unit_max = 0;
>  		return;
>  	}
> 
> -	*unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize;
> +	*unit_min = ip->i_mount->m_sb.sb_blocksize;
> +
> +	if (XFS_IS_REALTIME_INODE(ip)) {
> +		/* For now, set limit at 1x block */
> +		*unit_max = ip->i_mount->m_sb.sb_blocksize;
> +	} else {
> +		*unit_max =  min_t(unsigned int,
> +					XFS_FSB_TO_B(mp, mp->m_awu_max),
> +					target->bt_bdev_awu_max);
> +	}
>  }
> 
>  static void
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index e65a659901d5..414adfb944b9 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -665,6 +665,32 @@ xfs_agbtree_compute_maxlevels(
>  	levels = max(levels, mp->m_rmap_maxlevels);
>  	mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
>  }
> +static inline void
> +xfs_compute_awu_max(
> +	struct xfs_mount	*mp)
> +{
> +	xfs_agblock_t		agsize = mp->m_sb.sb_agblocks;
> +	xfs_agblock_t		awu_max;
> +
> +	if (!xfs_has_reflink(mp)) {
> +		mp->m_awu_max = 1;
> +		return;
> +	}
> +
> +	/*
> +	 * Find highest power-of-2 evenly divisible into agsize and which
> +	 * also fits into an unsigned int field.
> +	 */
> +	awu_max = 1;
> +	while (1) {
> +		if (agsize % (awu_max * 2))
> +			break;
> +		if (XFS_FSB_TO_B(mp, awu_max * 2) > UINT_MAX)
> +			break;
> +		awu_max *= 2;
> +	}
> +	mp->m_awu_max = awu_max;
> +}
> 
>  /* Compute maximum possible height for realtime btree types for this fs. */
>  static inline void
> @@ -751,6 +777,8 @@ xfs_mountfs(
>  	xfs_agbtree_compute_maxlevels(mp);
>  	xfs_rtbtree_compute_maxlevels(mp);
> 
> +	xfs_compute_awu_max(mp);
> +
>  	/*
>  	 * Check if sb_agblocks is aligned at stripe boundary.  If sb_agblocks
>  	 * is NOT aligned turn off m_dalign since allocator alignment is within
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 799b84220ebb..1b0136da2aec 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -229,6 +229,7 @@ typedef struct xfs_mount {
>  	bool			m_finobt_nores; /* no per-AG finobt resv. */
>  	bool			m_update_sb;	/* sb needs update in mount */
>  	unsigned int		m_max_open_zones;
> +	xfs_extlen_t		m_awu_max;	/* data device max atomic write */
> 
>  	/*
>  	 * Bitsets of per-fs metadata that have been checked and/or are sick.
> --
> 2.31.1
>
Christoph Hellwig March 12, 2025, 7:41 a.m. UTC | #2
On Mon, Mar 10, 2025 at 06:39:44PM +0000, John Garry wrote:
> For RT inode, just limit to 1x block, even though larger can be supported
> in future.

Why?

> +	if (XFS_IS_REALTIME_INODE(ip)) {
> +		/* For now, set limit at 1x block */

Why?  It' clearly obvious that you do that from the code, but comments
are supposed to explain why something non-obvious is done.

> +		*unit_max = ip->i_mount->m_sb.sb_blocksize;
> +	} else {
> +		*unit_max =  min_t(unsigned int,

double whitespace before the min.

> +++ b/fs/xfs/xfs_mount.c
> @@ -665,6 +665,32 @@ xfs_agbtree_compute_maxlevels(
>  	levels = max(levels, mp->m_rmap_maxlevels);
>  	mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
>  }
> +static inline void

Missing empty line after the previous function.
John Garry March 12, 2025, 8:09 a.m. UTC | #3
On 12/03/2025 07:41, Christoph Hellwig wrote:
> On Mon, Mar 10, 2025 at 06:39:44PM +0000, John Garry wrote:
>> For RT inode, just limit to 1x block, even though larger can be supported
>> in future.
> 
> Why?

Adding RT support adds even more complexity upfront, and RT is 
uncommonly used.

In addition, it will be limited to using power-of-2 rtextsize, so a 
slightly restricted feature.

> 
>> +	if (XFS_IS_REALTIME_INODE(ip)) {
>> +		/* For now, set limit at 1x block */
> 
> Why?  It' clearly obvious that you do that from the code, but comments
> are supposed to explain why something non-obvious is done.

ok

> 
>> +		*unit_max = ip->i_mount->m_sb.sb_blocksize;
>> +	} else {
>> +		*unit_max =  min_t(unsigned int,
> 
> double whitespace before the min.

will fix

> 
>> +++ b/fs/xfs/xfs_mount.c
>> @@ -665,6 +665,32 @@ xfs_agbtree_compute_maxlevels(
>>   	levels = max(levels, mp->m_rmap_maxlevels);
>>   	mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
>>   }
>> +static inline void
> 
> Missing empty line after the previous function.

Will fix.

Thanks,
John
Christoph Hellwig March 12, 2025, 8:13 a.m. UTC | #4
On Wed, Mar 12, 2025 at 08:09:47AM +0000, John Garry wrote:
> On 12/03/2025 07:41, Christoph Hellwig wrote:
> > On Mon, Mar 10, 2025 at 06:39:44PM +0000, John Garry wrote:
> > > For RT inode, just limit to 1x block, even though larger can be supported
> > > in future.
> > 
> > Why?
> 
> Adding RT support adds even more complexity upfront, and RT is uncommonly
> used.
> 
> In addition, it will be limited to using power-of-2 rtextsize, so a slightly
> restricted feature.

Please spell that out in the commit message.
John Garry March 12, 2025, 8:14 a.m. UTC | #5
On 12/03/2025 08:13, Christoph Hellwig wrote:
> On Wed, Mar 12, 2025 at 08:09:47AM +0000, John Garry wrote:
>> On 12/03/2025 07:41, Christoph Hellwig wrote:
>>> On Mon, Mar 10, 2025 at 06:39:44PM +0000, John Garry wrote:
>>>> For RT inode, just limit to 1x block, even though larger can be supported
>>>> in future.
>>> Why?
>> Adding RT support adds even more complexity upfront, and RT is uncommonly
>> used.
>>
>> In addition, it will be limited to using power-of-2 rtextsize, so a slightly
>> restricted feature.
> Please spell that out in the commit message.

ok, fine.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index de065cc2e7cf..16a1f9541690 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -607,12 +607,24 @@  xfs_get_atomic_write_attr(
 	unsigned int		*unit_min,
 	unsigned int		*unit_max)
 {
+	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
+	struct xfs_mount	*mp = ip->i_mount;
+
 	if (!xfs_inode_can_atomicwrite(ip)) {
 		*unit_min = *unit_max = 0;
 		return;
 	}
 
-	*unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize;
+	*unit_min = ip->i_mount->m_sb.sb_blocksize;
+
+	if (XFS_IS_REALTIME_INODE(ip)) {
+		/* For now, set limit at 1x block */
+		*unit_max = ip->i_mount->m_sb.sb_blocksize;
+	} else {
+		*unit_max =  min_t(unsigned int,
+					XFS_FSB_TO_B(mp, mp->m_awu_max),
+					target->bt_bdev_awu_max);
+	}
 }
 
 static void
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index e65a659901d5..414adfb944b9 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -665,6 +665,32 @@  xfs_agbtree_compute_maxlevels(
 	levels = max(levels, mp->m_rmap_maxlevels);
 	mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
 }
+static inline void
+xfs_compute_awu_max(
+	struct xfs_mount	*mp)
+{
+	xfs_agblock_t		agsize = mp->m_sb.sb_agblocks;
+	xfs_agblock_t		awu_max;
+
+	if (!xfs_has_reflink(mp)) {
+		mp->m_awu_max = 1;
+		return;
+	}
+
+	/*
+	 * Find highest power-of-2 evenly divisible into agsize and which
+	 * also fits into an unsigned int field.
+	 */
+	awu_max = 1;
+	while (1) {
+		if (agsize % (awu_max * 2))
+			break;
+		if (XFS_FSB_TO_B(mp, awu_max * 2) > UINT_MAX)
+			break;
+		awu_max *= 2;
+	}
+	mp->m_awu_max = awu_max;
+}
 
 /* Compute maximum possible height for realtime btree types for this fs. */
 static inline void
@@ -751,6 +777,8 @@  xfs_mountfs(
 	xfs_agbtree_compute_maxlevels(mp);
 	xfs_rtbtree_compute_maxlevels(mp);
 
+	xfs_compute_awu_max(mp);
+
 	/*
 	 * Check if sb_agblocks is aligned at stripe boundary.  If sb_agblocks
 	 * is NOT aligned turn off m_dalign since allocator alignment is within
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 799b84220ebb..1b0136da2aec 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -229,6 +229,7 @@  typedef struct xfs_mount {
 	bool			m_finobt_nores; /* no per-AG finobt resv. */
 	bool			m_update_sb;	/* sb needs update in mount */
 	unsigned int		m_max_open_zones;
+	xfs_extlen_t		m_awu_max;	/* data device max atomic write */
 
 	/*
 	 * Bitsets of per-fs metadata that have been checked and/or are sick.