Message ID | 20240930125438.2501050-6-john.g.garry@oracle.com (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
Series | block atomic writes for xfs | expand |
On Mon, Sep 30, 2024 at 12:54:36PM +0000, John Garry wrote: > Support providing info on atomic write unit min and max for an inode. > > For simplicity, currently we limit the min at the FS block size. As for > max, we limit also at FS block size, as there is no current method to > guarantee extent alignment or granularity for regular files. > > Signed-off-by: John Garry <john.g.garry@oracle.com> > --- > fs/xfs/xfs_inode.h | 17 +++++++++++++++++ > fs/xfs/xfs_iops.c | 24 ++++++++++++++++++++++++ > 2 files changed, 41 insertions(+) > > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h > index 1c62ee294a5a..1ea73402d592 100644 > --- a/fs/xfs/xfs_inode.h > +++ b/fs/xfs/xfs_inode.h > @@ -332,6 +332,23 @@ static inline bool xfs_inode_has_atomicwrites(struct xfs_inode *ip) > return ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES; > } > > +static inline bool > +xfs_inode_can_atomicwrite( > + struct xfs_inode *ip) > +{ > + struct xfs_mount *mp = ip->i_mount; > + struct xfs_buftarg *target = xfs_inode_buftarg(ip); > + > + if (!xfs_inode_has_atomicwrites(ip)) > + return false; > + if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min) > + return false; > + if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max) > + return false; > + > + return true; > +} > + > /* > * In-core inode flags. > */ > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c > index ee79cf161312..915d057db9bb 100644 > --- a/fs/xfs/xfs_iops.c > +++ b/fs/xfs/xfs_iops.c > @@ -570,6 +570,23 @@ xfs_stat_blksize( > return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize); > } > > +static void > +xfs_get_atomic_write_attr( > + struct xfs_inode *ip, > + unsigned int *unit_min, > + unsigned int *unit_max) > +{ > + struct xfs_mount *mp = ip->i_mount; > + struct xfs_sb *sbp = &mp->m_sb; > + > + if (!xfs_inode_can_atomicwrite(ip)) { > + *unit_min = *unit_max = 0; > + return; > + } > + > + *unit_min = *unit_max = sbp->sb_blocksize; Ok, so we're only supporting untorn writes if they're exactly the fs blocksize, and 1 fsblock is between awu_min/max. That simplifies a lot of things. :) Not supporting sub-fsblock atomic writes means that we'll never hit the directio COW fallback code, which uses the pagecache. Not supporting multi-fsblock atomic writes means that you don't have to figure out how to ensure that we always do cow on forcealign granularity. Though as I pointed out elsewhere in this thread, that's a forcealign problem. Yay! ;) > +} > + > STATIC int > xfs_vn_getattr( > struct mnt_idmap *idmap, > @@ -643,6 +660,13 @@ xfs_vn_getattr( > stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; > stat->dio_offset_align = bdev_logical_block_size(bdev); > } > + if (request_mask & STATX_WRITE_ATOMIC) { > + unsigned int unit_min, unit_max; > + > + xfs_get_atomic_write_attr(ip, &unit_min, &unit_max); > + generic_fill_statx_atomic_writes(stat, > + unit_min, unit_max); Consistent indenting and wrapping, please: xfs_get_atomic_write_attr(ip, &unit_min, &unit_max); generic_fill_statx_atomic_writes(stat, unit_min, unit_max); With that fixed, Reviewed-by: Darrick J. Wong <djwong@kernel.org> --D > + } > fallthrough; > default: > stat->blksize = xfs_stat_blksize(ip); > -- > 2.31.1 > >
On 30/09/2024 17:37, Darrick J. Wong wrote: > On Mon, Sep 30, 2024 at 12:54:36PM +0000, John Garry wrote: >> Support providing info on atomic write unit min and max for an inode. >> >> For simplicity, currently we limit the min at the FS block size. As for >> max, we limit also at FS block size, as there is no current method to >> guarantee extent alignment or granularity for regular files. >> >> Signed-off-by: John Garry <john.g.garry@oracle.com> >> --- >> fs/xfs/xfs_inode.h | 17 +++++++++++++++++ >> fs/xfs/xfs_iops.c | 24 ++++++++++++++++++++++++ >> 2 files changed, 41 insertions(+) >> >> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h >> index 1c62ee294a5a..1ea73402d592 100644 >> --- a/fs/xfs/xfs_inode.h >> +++ b/fs/xfs/xfs_inode.h >> @@ -332,6 +332,23 @@ static inline bool xfs_inode_has_atomicwrites(struct xfs_inode *ip) >> return ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES; >> } >> >> +static inline bool >> +xfs_inode_can_atomicwrite( >> + struct xfs_inode *ip) >> +{ >> + struct xfs_mount *mp = ip->i_mount; >> + struct xfs_buftarg *target = xfs_inode_buftarg(ip); >> + >> + if (!xfs_inode_has_atomicwrites(ip)) >> + return false; >> + if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min) >> + return false; >> + if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max) >> + return false; >> + >> + return true; >> +} >> + >> /* >> * In-core inode flags. >> */ >> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c >> index ee79cf161312..915d057db9bb 100644 >> --- a/fs/xfs/xfs_iops.c >> +++ b/fs/xfs/xfs_iops.c >> @@ -570,6 +570,23 @@ xfs_stat_blksize( >> return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize); >> } >> >> +static void >> +xfs_get_atomic_write_attr( >> + struct xfs_inode *ip, >> + unsigned int *unit_min, >> + unsigned int *unit_max) >> +{ >> + struct xfs_mount *mp = ip->i_mount; >> + struct xfs_sb *sbp = &mp->m_sb; >> + >> + if (!xfs_inode_can_atomicwrite(ip)) { >> + *unit_min = *unit_max = 0; >> + return; >> + } >> + >> + *unit_min = *unit_max = sbp->sb_blocksize; > > Ok, so we're only supporting untorn writes if they're exactly the fs > blocksize, and 1 fsblock is between awu_min/max. That simplifies a lot > of things. :) > > Not supporting sub-fsblock atomic writes means that we'll never hit the > directio COW fallback code, which uses the pagecache. My original idea (with forcealign) was to support 1FSB and larger. I suppose that with a larger FS block size we might want to support sub-fsblock atomic writes. However, for the moment, I don't see a need to support this. > > Not supporting multi-fsblock atomic writes means that you don't have to > figure out how to ensure that we always do cow on forcealign > granularity. Though as I pointed out elsewhere in this thread, that's a > forcealign problem. Sure > > Yay! ;) > >> +} >> + >> STATIC int >> xfs_vn_getattr( >> struct mnt_idmap *idmap, >> @@ -643,6 +660,13 @@ xfs_vn_getattr( >> stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; >> stat->dio_offset_align = bdev_logical_block_size(bdev); >> } >> + if (request_mask & STATX_WRITE_ATOMIC) { >> + unsigned int unit_min, unit_max; >> + >> + xfs_get_atomic_write_attr(ip, &unit_min, &unit_max); >> + generic_fill_statx_atomic_writes(stat, >> + unit_min, unit_max); > > Consistent indenting and wrapping, please: ok > > xfs_get_atomic_write_attr(ip, &unit_min, > &unit_max); > generic_fill_statx_atomic_writes(stat, > unit_min, unit_max); > > > With that fixed, > Reviewed-by: Darrick J. Wong <djwong@kernel.org> ok, thanks!
On Mon, Sep 30, 2024 at 09:37:16AM -0700, Darrick J. Wong wrote: > Ok, so we're only supporting untorn writes if they're exactly the fs > blocksize, and 1 fsblock is between awu_min/max. That simplifies a lot > of things. :) > > Not supporting sub-fsblock atomic writes means that we'll never hit the > directio COW fallback code, which uses the pagecache. > > Not supporting multi-fsblock atomic writes means that you don't have to > figure out how to ensure that we always do cow on forcealign > granularity. Though as I pointed out elsewhere in this thread, that's a > forcealign problem. It does simplify things a lot, and is probably a good idea for the initial version. But I suspect support for atomic writes smaller than the block size will be really useful and we should eventually support them, but maybe now on reflinked files or alwayscow.
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1c62ee294a5a..1ea73402d592 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -332,6 +332,23 @@ static inline bool xfs_inode_has_atomicwrites(struct xfs_inode *ip) return ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES; } +static inline bool +xfs_inode_can_atomicwrite( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + + if (!xfs_inode_has_atomicwrites(ip)) + return false; + if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min) + return false; + if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max) + return false; + + return true; +} + /* * In-core inode flags. */ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ee79cf161312..915d057db9bb 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -570,6 +570,23 @@ xfs_stat_blksize( return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize); } +static void +xfs_get_atomic_write_attr( + struct xfs_inode *ip, + unsigned int *unit_min, + unsigned int *unit_max) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_sb *sbp = &mp->m_sb; + + if (!xfs_inode_can_atomicwrite(ip)) { + *unit_min = *unit_max = 0; + return; + } + + *unit_min = *unit_max = sbp->sb_blocksize; +} + STATIC int xfs_vn_getattr( struct mnt_idmap *idmap, @@ -643,6 +660,13 @@ xfs_vn_getattr( stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; stat->dio_offset_align = bdev_logical_block_size(bdev); } + if (request_mask & STATX_WRITE_ATOMIC) { + unsigned int unit_min, unit_max; + + xfs_get_atomic_write_attr(ip, &unit_min, &unit_max); + generic_fill_statx_atomic_writes(stat, + unit_min, unit_max); + } fallthrough; default: stat->blksize = xfs_stat_blksize(ip);
Support providing info on atomic write unit min and max for an inode. For simplicity, currently we limit the min at the FS block size. As for max, we limit also at FS block size, as there is no current method to guarantee extent alignment or granularity for regular files. Signed-off-by: John Garry <john.g.garry@oracle.com> --- fs/xfs/xfs_inode.h | 17 +++++++++++++++++ fs/xfs/xfs_iops.c | 24 ++++++++++++++++++++++++ 2 files changed, 41 insertions(+)