Message ID | 20250310183946.932054-9-john.g.garry@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | large atomic writes for xfs with CoW | expand |
Thanks for updating it John. Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com> On Mon, Mar 10, 2025 at 06:39:44PM +0000, John Garry wrote: > Now that CoW-based atomic writes are supported, update the max size of an > atomic write. > > For simplicity, limit at the max of what the mounted bdev can support in > terms of atomic write limits. Maybe in future we will have a better way > to advertise this optimised limit. > > In addition, the max atomic write size needs to be aligned to the agsize. > Limit the size of atomic writes to the greatest power-of-two factor of the > agsize so that allocations for an atomic write will always be aligned > compatibly with the alignment requirements of the storage. > > For RT inode, just limit to 1x block, even though larger can be supported > in future. > > Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> > Signed-off-by: John Garry <john.g.garry@oracle.com> > --- > fs/xfs/xfs_iops.c | 14 +++++++++++++- > fs/xfs/xfs_mount.c | 28 ++++++++++++++++++++++++++++ > fs/xfs/xfs_mount.h | 1 + > 3 files changed, 42 insertions(+), 1 deletion(-) > > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c > index de065cc2e7cf..16a1f9541690 100644 > --- a/fs/xfs/xfs_iops.c > +++ b/fs/xfs/xfs_iops.c > @@ -607,12 +607,24 @@ xfs_get_atomic_write_attr( > unsigned int *unit_min, > unsigned int *unit_max) > { > + struct xfs_buftarg *target = xfs_inode_buftarg(ip); > + struct xfs_mount *mp = ip->i_mount; > + > if (!xfs_inode_can_atomicwrite(ip)) { > *unit_min = *unit_max = 0; > return; > } > > - *unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize; > + *unit_min = ip->i_mount->m_sb.sb_blocksize; > + > + if (XFS_IS_REALTIME_INODE(ip)) { > + /* For now, set limit at 1x block */ > + *unit_max = ip->i_mount->m_sb.sb_blocksize; > + } else { > + *unit_max = min_t(unsigned int, > + XFS_FSB_TO_B(mp, mp->m_awu_max), > + target->bt_bdev_awu_max); > + } > } > > static void > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c > index e65a659901d5..414adfb944b9 100644 > --- a/fs/xfs/xfs_mount.c > +++ b/fs/xfs/xfs_mount.c > @@ -665,6 +665,32 @@ xfs_agbtree_compute_maxlevels( > levels = max(levels, mp->m_rmap_maxlevels); > mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels); > } > +static inline void > +xfs_compute_awu_max( > + struct xfs_mount *mp) > +{ > + xfs_agblock_t agsize = mp->m_sb.sb_agblocks; > + xfs_agblock_t awu_max; > + > + if (!xfs_has_reflink(mp)) { > + mp->m_awu_max = 1; > + return; > + } > + > + /* > + * Find highest power-of-2 evenly divisible into agsize and which > + * also fits into an unsigned int field. > + */ > + awu_max = 1; > + while (1) { > + if (agsize % (awu_max * 2)) > + break; > + if (XFS_FSB_TO_B(mp, awu_max * 2) > UINT_MAX) > + break; > + awu_max *= 2; > + } > + mp->m_awu_max = awu_max; > +} > > /* Compute maximum possible height for realtime btree types for this fs. */ > static inline void > @@ -751,6 +777,8 @@ xfs_mountfs( > xfs_agbtree_compute_maxlevels(mp); > xfs_rtbtree_compute_maxlevels(mp); > > + xfs_compute_awu_max(mp); > + > /* > * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks > * is NOT aligned turn off m_dalign since allocator alignment is within > diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h > index 799b84220ebb..1b0136da2aec 100644 > --- a/fs/xfs/xfs_mount.h > +++ b/fs/xfs/xfs_mount.h > @@ -229,6 +229,7 @@ typedef struct xfs_mount { > bool m_finobt_nores; /* no per-AG finobt resv. */ > bool m_update_sb; /* sb needs update in mount */ > unsigned int m_max_open_zones; > + xfs_extlen_t m_awu_max; /* data device max atomic write */ > > /* > * Bitsets of per-fs metadata that have been checked and/or are sick. > -- > 2.31.1 >
On Mon, Mar 10, 2025 at 06:39:44PM +0000, John Garry wrote: > For RT inode, just limit to 1x block, even though larger can be supported > in future. Why? > + if (XFS_IS_REALTIME_INODE(ip)) { > + /* For now, set limit at 1x block */ Why? It' clearly obvious that you do that from the code, but comments are supposed to explain why something non-obvious is done. > + *unit_max = ip->i_mount->m_sb.sb_blocksize; > + } else { > + *unit_max = min_t(unsigned int, double whitespace before the min. > +++ b/fs/xfs/xfs_mount.c > @@ -665,6 +665,32 @@ xfs_agbtree_compute_maxlevels( > levels = max(levels, mp->m_rmap_maxlevels); > mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels); > } > +static inline void Missing empty line after the previous function.
On 12/03/2025 07:41, Christoph Hellwig wrote: > On Mon, Mar 10, 2025 at 06:39:44PM +0000, John Garry wrote: >> For RT inode, just limit to 1x block, even though larger can be supported >> in future. > > Why? Adding RT support adds even more complexity upfront, and RT is uncommonly used. In addition, it will be limited to using power-of-2 rtextsize, so a slightly restricted feature. > >> + if (XFS_IS_REALTIME_INODE(ip)) { >> + /* For now, set limit at 1x block */ > > Why? It' clearly obvious that you do that from the code, but comments > are supposed to explain why something non-obvious is done. ok > >> + *unit_max = ip->i_mount->m_sb.sb_blocksize; >> + } else { >> + *unit_max = min_t(unsigned int, > > double whitespace before the min. will fix > >> +++ b/fs/xfs/xfs_mount.c >> @@ -665,6 +665,32 @@ xfs_agbtree_compute_maxlevels( >> levels = max(levels, mp->m_rmap_maxlevels); >> mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels); >> } >> +static inline void > > Missing empty line after the previous function. Will fix. Thanks, John
On Wed, Mar 12, 2025 at 08:09:47AM +0000, John Garry wrote: > On 12/03/2025 07:41, Christoph Hellwig wrote: > > On Mon, Mar 10, 2025 at 06:39:44PM +0000, John Garry wrote: > > > For RT inode, just limit to 1x block, even though larger can be supported > > > in future. > > > > Why? > > Adding RT support adds even more complexity upfront, and RT is uncommonly > used. > > In addition, it will be limited to using power-of-2 rtextsize, so a slightly > restricted feature. Please spell that out in the commit message.
On 12/03/2025 08:13, Christoph Hellwig wrote: > On Wed, Mar 12, 2025 at 08:09:47AM +0000, John Garry wrote: >> On 12/03/2025 07:41, Christoph Hellwig wrote: >>> On Mon, Mar 10, 2025 at 06:39:44PM +0000, John Garry wrote: >>>> For RT inode, just limit to 1x block, even though larger can be supported >>>> in future. >>> Why? >> Adding RT support adds even more complexity upfront, and RT is uncommonly >> used. >> >> In addition, it will be limited to using power-of-2 rtextsize, so a slightly >> restricted feature. > Please spell that out in the commit message. ok, fine.
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index de065cc2e7cf..16a1f9541690 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -607,12 +607,24 @@ xfs_get_atomic_write_attr( unsigned int *unit_min, unsigned int *unit_max) { + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + struct xfs_mount *mp = ip->i_mount; + if (!xfs_inode_can_atomicwrite(ip)) { *unit_min = *unit_max = 0; return; } - *unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize; + *unit_min = ip->i_mount->m_sb.sb_blocksize; + + if (XFS_IS_REALTIME_INODE(ip)) { + /* For now, set limit at 1x block */ + *unit_max = ip->i_mount->m_sb.sb_blocksize; + } else { + *unit_max = min_t(unsigned int, + XFS_FSB_TO_B(mp, mp->m_awu_max), + target->bt_bdev_awu_max); + } } static void diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index e65a659901d5..414adfb944b9 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -665,6 +665,32 @@ xfs_agbtree_compute_maxlevels( levels = max(levels, mp->m_rmap_maxlevels); mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels); } +static inline void +xfs_compute_awu_max( + struct xfs_mount *mp) +{ + xfs_agblock_t agsize = mp->m_sb.sb_agblocks; + xfs_agblock_t awu_max; + + if (!xfs_has_reflink(mp)) { + mp->m_awu_max = 1; + return; + } + + /* + * Find highest power-of-2 evenly divisible into agsize and which + * also fits into an unsigned int field. + */ + awu_max = 1; + while (1) { + if (agsize % (awu_max * 2)) + break; + if (XFS_FSB_TO_B(mp, awu_max * 2) > UINT_MAX) + break; + awu_max *= 2; + } + mp->m_awu_max = awu_max; +} /* Compute maximum possible height for realtime btree types for this fs. */ static inline void @@ -751,6 +777,8 @@ xfs_mountfs( xfs_agbtree_compute_maxlevels(mp); xfs_rtbtree_compute_maxlevels(mp); + xfs_compute_awu_max(mp); + /* * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks * is NOT aligned turn off m_dalign since allocator alignment is within diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 799b84220ebb..1b0136da2aec 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -229,6 +229,7 @@ typedef struct xfs_mount { bool m_finobt_nores; /* no per-AG finobt resv. */ bool m_update_sb; /* sb needs update in mount */ unsigned int m_max_open_zones; + xfs_extlen_t m_awu_max; /* data device max atomic write */ /* * Bitsets of per-fs metadata that have been checked and/or are sick.