diff mbox series

[v4,11/12] xfs: Update atomic write max size

Message ID 20250303171120.2837067-12-john.g.garry@oracle.com (mailing list archive)
State New
Headers show
Series large atomic writes for xfs with CoW | expand

Commit Message

John Garry March 3, 2025, 5:11 p.m. UTC
Now that CoW-based atomic writes are supported, update the max size of an
atomic write.

For simplicity, limit at the max of what the mounted bdev can support in
terms of atomic write limits. Maybe in future we will have a better way
to advertise this optimised limit.

In addition, the max atomic write size needs to be aligned to the agsize.
Limit the size of atomic writes to the greatest power-of-two factor of the
agsize so that allocations for an atomic write will always be aligned
compatibly with the alignment requirements of the storage.

For RT inode, just limit to 1x block, even though larger can be supported
in future.

Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/xfs/xfs_iops.c  | 13 ++++++++++++-
 fs/xfs/xfs_mount.c | 28 ++++++++++++++++++++++++++++
 fs/xfs/xfs_mount.h |  1 +
 3 files changed, 41 insertions(+), 1 deletion(-)

Comments

Carlos Maiolino March 10, 2025, 10:06 a.m. UTC | #1
Hi John.

On Mon, Mar 03, 2025 at 05:11:19PM +0000, John Garry wrote:
> Now that CoW-based atomic writes are supported, update the max size of an
> atomic write.
> 
> For simplicity, limit at the max of what the mounted bdev can support in
> terms of atomic write limits. Maybe in future we will have a better way
> to advertise this optimised limit.
> 
> In addition, the max atomic write size needs to be aligned to the agsize.
> Limit the size of atomic writes to the greatest power-of-two factor of the
> agsize so that allocations for an atomic write will always be aligned
> compatibly with the alignment requirements of the storage.
> 
> For RT inode, just limit to 1x block, even though larger can be supported
> in future.
> 
> Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  fs/xfs/xfs_iops.c  | 13 ++++++++++++-
>  fs/xfs/xfs_mount.c | 28 ++++++++++++++++++++++++++++
>  fs/xfs/xfs_mount.h |  1 +
>  3 files changed, 41 insertions(+), 1 deletion(-)
> 

> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index fbed172d6770..bc96b8214173 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -198,6 +198,7 @@ typedef struct xfs_mount {
>  	bool			m_fail_unmount;
>  	bool			m_finobt_nores; /* no per-AG finobt resv. */
>  	bool			m_update_sb;	/* sb needs update in mount */
> +	xfs_extlen_t		awu_max;	/* data device max atomic write */

Could you please rename this to something else? All fields within xfs_mount
follows the same pattern m_<name>. Perhaps m_awu_max?

I was going to send a patch replacing it once I had this merged, but giving
Dave's new comments, and the conflicts with zoned devices, you'll need to send a
V5, so, please include this change if nobody else has any objections on keeping
the xfs_mount naming convention.

Carlos.

> 
>  	/*
>  	 * Bitsets of per-fs metadata that have been checked and/or are sick.
> --
> 2.31.1
>
John Garry March 10, 2025, 10:54 a.m. UTC | #2
On 10/03/2025 10:06, Carlos Maiolino wrote:
>> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
>> index fbed172d6770..bc96b8214173 100644
>> --- a/fs/xfs/xfs_mount.h
>> +++ b/fs/xfs/xfs_mount.h
>> @@ -198,6 +198,7 @@ typedef struct xfs_mount {
>>   	bool			m_fail_unmount;
>>   	bool			m_finobt_nores; /* no per-AG finobt resv. */
>>   	bool			m_update_sb;	/* sb needs update in mount */
>> +	xfs_extlen_t		awu_max;	/* data device max atomic write */
> Could you please rename this to something else? All fields within xfs_mount
> follows the same pattern m_<name>. Perhaps m_awu_max?

Fine, but I think I then need to deal with spilling multiple lines to 
accommodate a proper comment.

> 
> I was going to send a patch replacing it once I had this merged, but giving
> Dave's new comments, and the conflicts with zoned devices, you'll need to send a
> V5, so, please include this change if nobody else has any objections on keeping
> the xfs_mount naming convention.

What branch do you want me to send this against?

Thanks,
John
Carlos Maiolino March 10, 2025, 11:11 a.m. UTC | #3
On Mon, Mar 10, 2025 at 10:54:23AM +0000, John Garry wrote:
> On 10/03/2025 10:06, Carlos Maiolino wrote:
> >> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> >> index fbed172d6770..bc96b8214173 100644
> >> --- a/fs/xfs/xfs_mount.h
> >> +++ b/fs/xfs/xfs_mount.h
> >> @@ -198,6 +198,7 @@ typedef struct xfs_mount {
> >>   	bool			m_fail_unmount;
> >>   	bool			m_finobt_nores; /* no per-AG finobt resv. */
> >>   	bool			m_update_sb;	/* sb needs update in mount */
> >> +	xfs_extlen_t		awu_max;	/* data device max atomic write */
> > Could you please rename this to something else? All fields within xfs_mount
> > follows the same pattern m_<name>. Perhaps m_awu_max?
> 
> Fine, but I think I then need to deal with spilling multiple lines to
> accommodate a proper comment.
> 
> >
> > I was going to send a patch replacing it once I had this merged, but giving
> > Dave's new comments, and the conflicts with zoned devices, you'll need to send a
> > V5, so, please include this change if nobody else has any objections on keeping
> > the xfs_mount naming convention.
> 
> What branch do you want me to send this against?

I just pushed everything to for-next, so you can just rebase it against for-next

Notice this includes the iomap patches you sent in this series which Christian
picked up. So if you need to re-work something on the iomap patches, you'll
probably need to take this into account.

Cheers.
Carlos

> 
> Thanks,
> John
>
John Garry March 10, 2025, 11:20 a.m. UTC | #4
On 10/03/2025 11:11, Carlos Maiolino wrote:
> On Mon, Mar 10, 2025 at 10:54:23AM +0000, John Garry wrote:
>> On 10/03/2025 10:06, Carlos Maiolino wrote:
>>>> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
>>>> index fbed172d6770..bc96b8214173 100644
>>>> --- a/fs/xfs/xfs_mount.h
>>>> +++ b/fs/xfs/xfs_mount.h
>>>> @@ -198,6 +198,7 @@ typedef struct xfs_mount {
>>>>    	bool			m_fail_unmount;
>>>>    	bool			m_finobt_nores; /* no per-AG finobt resv. */
>>>>    	bool			m_update_sb;	/* sb needs update in mount */
>>>> +	xfs_extlen_t		awu_max;	/* data device max atomic write */
>>> Could you please rename this to something else? All fields within xfs_mount
>>> follows the same pattern m_<name>. Perhaps m_awu_max?
>> Fine, but I think I then need to deal with spilling multiple lines to
>> accommodate a proper comment.
>>
>>> I was going to send a patch replacing it once I had this merged, but giving
>>> Dave's new comments, and the conflicts with zoned devices, you'll need to send a
>>> V5, so, please include this change if nobody else has any objections on keeping
>>> the xfs_mount naming convention.
>> What branch do you want me to send this against?
> I just pushed everything to for-next, so you can just rebase it against for-next
> 
> Notice this includes the iomap patches you sent in this series which Christian
> picked up. So if you need to re-work something on the iomap patches, you'll
> probably need to take this into account.

Your branch includes the iomap changes, so hard to deal with.

For the iomap change, Dave was suggesting a name change only, so not a 
major issue.

So if we really want to go with a name change, then I could add a patch 
to change the name only and include in the v5.

Review comments are always welcome, but I wish that they did not come so 
late...

Thanks,
John
Carlos Maiolino March 10, 2025, 12:38 p.m. UTC | #5
On Mon, Mar 10, 2025 at 11:20:23AM +0000, John Garry wrote:
> On 10/03/2025 11:11, Carlos Maiolino wrote:
> > On Mon, Mar 10, 2025 at 10:54:23AM +0000, John Garry wrote:
> >> On 10/03/2025 10:06, Carlos Maiolino wrote:
> >>>> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> >>>> index fbed172d6770..bc96b8214173 100644
> >>>> --- a/fs/xfs/xfs_mount.h
> >>>> +++ b/fs/xfs/xfs_mount.h
> >>>> @@ -198,6 +198,7 @@ typedef struct xfs_mount {
> >>>>    	bool			m_fail_unmount;
> >>>>    	bool			m_finobt_nores; /* no per-AG finobt resv. */
> >>>>    	bool			m_update_sb;	/* sb needs update in mount */
> >>>> +	xfs_extlen_t		awu_max;	/* data device max atomic write */
> >>> Could you please rename this to something else? All fields within xfs_mount
> >>> follows the same pattern m_<name>. Perhaps m_awu_max?
> >> Fine, but I think I then need to deal with spilling multiple lines to
> >> accommodate a proper comment.
> >>
> >>> I was going to send a patch replacing it once I had this merged, but giving
> >>> Dave's new comments, and the conflicts with zoned devices, you'll need to send a
> >>> V5, so, please include this change if nobody else has any objections on keeping
> >>> the xfs_mount naming convention.
> >> What branch do you want me to send this against?
> > I just pushed everything to for-next, so you can just rebase it against for-next
> >
> > Notice this includes the iomap patches you sent in this series which Christian
> > picked up. So if you need to re-work something on the iomap patches, you'll
> > probably need to take this into account.
> 
> Your branch includes the iomap changes, so hard to deal with.

> For the iomap change, Dave was suggesting a name change only, so not a
> major issue.

If you don't plan to change anything related to the iomap (depending on the path
the discussion on path 5/12 takes), I believe all you need to do is remove the
iomap patches from your branch, sending only the xfs patches.

> So if we really want to go with a name change, then I could add a patch
> to change the name only and include in the v5.
> 
> Review comments are always welcome, but I wish that they did not come so
> late...

That's why I didn't bother asking you to change xfs_mount until now, I'd do it
myself if you weren't going to send a V5.
But Dave's comments are more than a mere naming convention, but logic
adjusting due to operator precedence.

Carlos

> 
> Thanks,
> John
John Garry March 10, 2025, 12:59 p.m. UTC | #6
On 10/03/2025 12:38, Carlos Maiolino wrote:
> On Mon, Mar 10, 2025 at 11:20:23AM +0000, John Garry wrote:
>> On 10/03/2025 11:11, Carlos Maiolino wrote:
>>> On Mon, Mar 10, 2025 at 10:54:23AM +0000, John Garry wrote:
>>>> On 10/03/2025 10:06, Carlos Maiolino wrote:
>>>>>> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
>>>>>> index fbed172d6770..bc96b8214173 100644
>>>>>> --- a/fs/xfs/xfs_mount.h
>>>>>> +++ b/fs/xfs/xfs_mount.h
>>>>>> @@ -198,6 +198,7 @@ typedef struct xfs_mount {
>>>>>>     	bool			m_fail_unmount;
>>>>>>     	bool			m_finobt_nores; /* no per-AG finobt resv. */
>>>>>>     	bool			m_update_sb;	/* sb needs update in mount */
>>>>>> +	xfs_extlen_t		awu_max;	/* data device max atomic write */
>>>>> Could you please rename this to something else? All fields within xfs_mount
>>>>> follows the same pattern m_<name>. Perhaps m_awu_max?
>>>> Fine, but I think I then need to deal with spilling multiple lines to
>>>> accommodate a proper comment.
>>>>
>>>>> I was going to send a patch replacing it once I had this merged, but giving
>>>>> Dave's new comments, and the conflicts with zoned devices, you'll need to send a
>>>>> V5, so, please include this change if nobody else has any objections on keeping
>>>>> the xfs_mount naming convention.
>>>> What branch do you want me to send this against?
>>> I just pushed everything to for-next, so you can just rebase it against for-next
>>>
>>> Notice this includes the iomap patches you sent in this series which Christian
>>> picked up. So if you need to re-work something on the iomap patches, you'll
>>> probably need to take this into account.
>>
>> Your branch includes the iomap changes, so hard to deal with.
> 
>> For the iomap change, Dave was suggesting a name change only, so not a
>> major issue.
> 
> If you don't plan to change anything related to the iomap (depending on the path
> the discussion on path 5/12 takes), I believe all you need to do is remove the
> iomap patches from your branch, sending only the xfs patches.

Right

> 
>> So if we really want to go with a name change, then I could add a patch
>> to change the name only and include in the v5.
>>
>> Review comments are always welcome, but I wish that they did not come so
>> late...
> 
> That's why I didn't bother asking you to change xfs_mount until now, I'd do it
> myself if you weren't going to send a V5.
> But Dave's comments are more than a mere naming convention, but logic
> adjusting due to operator precedence.
> 

ok, working on that now.

Cheers,
John
diff mbox series

Patch

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ea79fb246e33..d0a537696514 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -606,12 +606,23 @@  xfs_get_atomic_write_attr(
 	unsigned int		*unit_min,
 	unsigned int		*unit_max)
 {
+	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
+	struct xfs_mount	*mp = ip->i_mount;
+
 	if (!xfs_inode_can_atomicwrite(ip)) {
 		*unit_min = *unit_max = 0;
 		return;
 	}
 
-	*unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize;
+	*unit_min = ip->i_mount->m_sb.sb_blocksize;
+
+	if (XFS_IS_REALTIME_INODE(ip)) {
+		/* For now, set limit at 1x block */
+		*unit_max = ip->i_mount->m_sb.sb_blocksize;
+	} else {
+		*unit_max =  min_t(unsigned int, XFS_FSB_TO_B(mp, mp->awu_max),
+					target->bt_bdev_awu_max);
+	}
 }
 
 static void
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b69356582b86..c6fabf7ac506 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -648,6 +648,32 @@  xfs_agbtree_compute_maxlevels(
 	levels = max(levels, mp->m_rmap_maxlevels);
 	mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
 }
+static inline void
+xfs_compute_awu_max(
+	struct xfs_mount	*mp)
+{
+	xfs_agblock_t		agsize = mp->m_sb.sb_agblocks;
+	xfs_agblock_t		awu_max;
+
+	if (!xfs_has_reflink(mp)) {
+		mp->awu_max = 1;
+		return;
+	}
+
+	/*
+	 * Find highest power-of-2 evenly divisible into agsize and which
+	 * also fits into an unsigned int field.
+	 */
+	awu_max = 1;
+	while (1) {
+		if (agsize % (awu_max * 2))
+			break;
+		if (XFS_FSB_TO_B(mp, awu_max * 2) > UINT_MAX)
+			break;
+		awu_max *= 2;
+	}
+	mp->awu_max = awu_max;
+}
 
 /* Compute maximum possible height for realtime btree types for this fs. */
 static inline void
@@ -733,6 +759,8 @@  xfs_mountfs(
 	xfs_agbtree_compute_maxlevels(mp);
 	xfs_rtbtree_compute_maxlevels(mp);
 
+	xfs_compute_awu_max(mp);
+
 	/*
 	 * Check if sb_agblocks is aligned at stripe boundary.  If sb_agblocks
 	 * is NOT aligned turn off m_dalign since allocator alignment is within
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index fbed172d6770..bc96b8214173 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -198,6 +198,7 @@  typedef struct xfs_mount {
 	bool			m_fail_unmount;
 	bool			m_finobt_nores; /* no per-AG finobt resv. */
 	bool			m_update_sb;	/* sb needs update in mount */
+	xfs_extlen_t		awu_max;	/* data device max atomic write */
 
 	/*
 	 * Bitsets of per-fs metadata that have been checked and/or are sick.