Message ID | 20250213135619.1148432-12-john.g.garry@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | large atomic writes for xfs with CoW | expand |
On Thu, Feb 13, 2025 at 01:56:19PM +0000, John Garry wrote: > When issuing an atomic write by the CoW method, give the block allocator a > hint to naturally align the data blocks. > > This means that we have a better chance to issuing the atomic write via > HW offload next time. > > Signed-off-by: John Garry <john.g.garry@oracle.com> > --- > fs/xfs/libxfs/xfs_bmap.c | 7 ++++++- > fs/xfs/libxfs/xfs_bmap.h | 6 +++++- > fs/xfs/xfs_reflink.c | 8 ++++++-- > 3 files changed, 17 insertions(+), 4 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > index 0ef19f1469ec..9bfdfb7cdcae 100644 > --- a/fs/xfs/libxfs/xfs_bmap.c > +++ b/fs/xfs/libxfs/xfs_bmap.c > @@ -3454,6 +3454,12 @@ xfs_bmap_compute_alignments( > align = xfs_get_cowextsz_hint(ap->ip); > else if (ap->datatype & XFS_ALLOC_USERDATA) > align = xfs_get_extsz_hint(ap->ip); > + > + if (align > 1 && ap->flags & XFS_BMAPI_EXTSZALIGN) > + args->alignment = align; > + else > + args->alignment = 1; > + > if (align) { > if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, > ap->eof, 0, ap->conv, &ap->offset, > @@ -3782,7 +3788,6 @@ xfs_bmap_btalloc( > .wasdel = ap->wasdel, > .resv = XFS_AG_RESV_NONE, > .datatype = ap->datatype, > - .alignment = 1, > .minalignslop = 0, > }; > xfs_fileoff_t orig_offset; > diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h > index 4b721d935994..7a31697331dc 100644 > --- a/fs/xfs/libxfs/xfs_bmap.h > +++ b/fs/xfs/libxfs/xfs_bmap.h > @@ -87,6 +87,9 @@ struct xfs_bmalloca { > /* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */ > #define XFS_BMAPI_NORMAP (1u << 10) > > +/* Try to naturally align allocations to extsz hint */ > +#define XFS_BMAPI_EXTSZALIGN (1u << 11) IMO "naturally" makes things confusing here -- are we aligning to the extent size hint, or are we aligning to the length requested? Or whatever it is that "naturally" means. (FWIW you and I have bumped over this repeatedly, so maybe this is simple one of those cognitive friction things where block storage always deals with powers of two and "naturally" means a lot, vs. filesystems where we don't usually enforce alignment anywhere.) I suggest "Try to align allocations to the extent size hint" for the comment, and with that: Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> --D > + > #define XFS_BMAPI_FLAGS \ > { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ > { XFS_BMAPI_METADATA, "METADATA" }, \ > @@ -98,7 +101,8 @@ struct xfs_bmalloca { > { XFS_BMAPI_REMAP, "REMAP" }, \ > { XFS_BMAPI_COWFORK, "COWFORK" }, \ > { XFS_BMAPI_NODISCARD, "NODISCARD" }, \ > - { XFS_BMAPI_NORMAP, "NORMAP" } > + { XFS_BMAPI_NORMAP, "NORMAP" },\ > + { XFS_BMAPI_EXTSZALIGN, "EXTSZALIGN" } > > > static inline int xfs_bmapi_aflag(int w) > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c > index d097d33dc000..033dff86ecf0 100644 > --- a/fs/xfs/xfs_reflink.c > +++ b/fs/xfs/xfs_reflink.c > @@ -445,6 +445,11 @@ xfs_reflink_fill_cow_hole( > int nimaps; > int error; > bool found; > + uint32_t bmapi_flags = XFS_BMAPI_COWFORK | > + XFS_BMAPI_PREALLOC; > + > + if (atomic) > + bmapi_flags |= XFS_BMAPI_EXTSZALIGN; > > resaligned = xfs_aligned_fsb_count(imap->br_startoff, > imap->br_blockcount, xfs_get_cowextsz_hint(ip)); > @@ -478,8 +483,7 @@ xfs_reflink_fill_cow_hole( > /* Allocate the entire reservation as unwritten blocks. */ > nimaps = 1; > error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, > - XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap, > - &nimaps); > + bmapi_flags, 0, cmap, &nimaps); > if (error) > goto out_trans_cancel; > > -- > 2.31.1 > >
On 24/02/2025 20:37, Darrick J. Wong wrote: > On Thu, Feb 13, 2025 at 01:56:19PM +0000, John Garry wrote: >> When issuing an atomic write by the CoW method, give the block allocator a >> hint to naturally align the data blocks. >> >> This means that we have a better chance to issuing the atomic write via >> HW offload next time. >> >> Signed-off-by: John Garry <john.g.garry@oracle.com> >> --- >> fs/xfs/libxfs/xfs_bmap.c | 7 ++++++- >> fs/xfs/libxfs/xfs_bmap.h | 6 +++++- >> fs/xfs/xfs_reflink.c | 8 ++++++-- >> 3 files changed, 17 insertions(+), 4 deletions(-) >> >> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c >> index 0ef19f1469ec..9bfdfb7cdcae 100644 >> --- a/fs/xfs/libxfs/xfs_bmap.c >> +++ b/fs/xfs/libxfs/xfs_bmap.c >> @@ -3454,6 +3454,12 @@ xfs_bmap_compute_alignments( >> align = xfs_get_cowextsz_hint(ap->ip); >> else if (ap->datatype & XFS_ALLOC_USERDATA) >> align = xfs_get_extsz_hint(ap->ip); >> + >> + if (align > 1 && ap->flags & XFS_BMAPI_EXTSZALIGN) >> + args->alignment = align; >> + else >> + args->alignment = 1; >> + >> if (align) { >> if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, >> ap->eof, 0, ap->conv, &ap->offset, >> @@ -3782,7 +3788,6 @@ xfs_bmap_btalloc( >> .wasdel = ap->wasdel, >> .resv = XFS_AG_RESV_NONE, >> .datatype = ap->datatype, >> - .alignment = 1, >> .minalignslop = 0, >> }; >> xfs_fileoff_t orig_offset; >> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h >> index 4b721d935994..7a31697331dc 100644 >> --- a/fs/xfs/libxfs/xfs_bmap.h >> +++ b/fs/xfs/libxfs/xfs_bmap.h >> @@ -87,6 +87,9 @@ struct xfs_bmalloca { >> /* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */ >> #define XFS_BMAPI_NORMAP (1u << 10) >> >> +/* Try to naturally align allocations to extsz hint */ >> +#define XFS_BMAPI_EXTSZALIGN (1u << 11) > > IMO "naturally" makes things confusing here -- are we aligning to the > extent size hint, or are we aligning to the length requested? Or > whatever it is that "naturally" means. We align to extsz hint, not length. As for use of word "naturally", I'll try to avoid using that word. > > (FWIW you and I have bumped over this repeatedly, so maybe this is > simple one of those cognitive friction things where block storage always > deals with powers of two and "naturally" means a lot, vs. filesystems > where we don't usually enforce alignment anywhere.) > > I suggest "Try to align allocations to the extent size hint" for the > comment, and with that: that's fine > Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> > cheers, John
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 0ef19f1469ec..9bfdfb7cdcae 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3454,6 +3454,12 @@ xfs_bmap_compute_alignments( align = xfs_get_cowextsz_hint(ap->ip); else if (ap->datatype & XFS_ALLOC_USERDATA) align = xfs_get_extsz_hint(ap->ip); + + if (align > 1 && ap->flags & XFS_BMAPI_EXTSZALIGN) + args->alignment = align; + else + args->alignment = 1; + if (align) { if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, ap->eof, 0, ap->conv, &ap->offset, @@ -3782,7 +3788,6 @@ xfs_bmap_btalloc( .wasdel = ap->wasdel, .resv = XFS_AG_RESV_NONE, .datatype = ap->datatype, - .alignment = 1, .minalignslop = 0, }; xfs_fileoff_t orig_offset; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 4b721d935994..7a31697331dc 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -87,6 +87,9 @@ struct xfs_bmalloca { /* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */ #define XFS_BMAPI_NORMAP (1u << 10) +/* Try to naturally align allocations to extsz hint */ +#define XFS_BMAPI_EXTSZALIGN (1u << 11) + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -98,7 +101,8 @@ struct xfs_bmalloca { { XFS_BMAPI_REMAP, "REMAP" }, \ { XFS_BMAPI_COWFORK, "COWFORK" }, \ { XFS_BMAPI_NODISCARD, "NODISCARD" }, \ - { XFS_BMAPI_NORMAP, "NORMAP" } + { XFS_BMAPI_NORMAP, "NORMAP" },\ + { XFS_BMAPI_EXTSZALIGN, "EXTSZALIGN" } static inline int xfs_bmapi_aflag(int w) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index d097d33dc000..033dff86ecf0 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -445,6 +445,11 @@ xfs_reflink_fill_cow_hole( int nimaps; int error; bool found; + uint32_t bmapi_flags = XFS_BMAPI_COWFORK | + XFS_BMAPI_PREALLOC; + + if (atomic) + bmapi_flags |= XFS_BMAPI_EXTSZALIGN; resaligned = xfs_aligned_fsb_count(imap->br_startoff, imap->br_blockcount, xfs_get_cowextsz_hint(ip)); @@ -478,8 +483,7 @@ xfs_reflink_fill_cow_hole( /* Allocate the entire reservation as unwritten blocks. */ nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, - XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap, - &nimaps); + bmapi_flags, 0, cmap, &nimaps); if (error) goto out_trans_cancel;
When issuing an atomic write by the CoW method, give the block allocator a hint to naturally align the data blocks. This means that we have a better chance to issuing the atomic write via HW offload next time. Signed-off-by: John Garry <john.g.garry@oracle.com> --- fs/xfs/libxfs/xfs_bmap.c | 7 ++++++- fs/xfs/libxfs/xfs_bmap.h | 6 +++++- fs/xfs/xfs_reflink.c | 8 ++++++-- 3 files changed, 17 insertions(+), 4 deletions(-)