@@ -354,12 +354,16 @@ xfs_sb_has_compat_feature(
#define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */
#define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */
#define XFS_SB_FEAT_RO_COMPAT_FORCEALIGN (1 << 30) /* aligned file data extents */
+#define XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES (1 << 31) /* atomicwrites enabled */
+
#define XFS_SB_FEAT_RO_COMPAT_ALL \
(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
XFS_SB_FEAT_RO_COMPAT_REFLINK| \
XFS_SB_FEAT_RO_COMPAT_INOBTCNT | \
- XFS_SB_FEAT_RO_COMPAT_FORCEALIGN)
+ XFS_SB_FEAT_RO_COMPAT_FORCEALIGN | \
+ XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES)
+
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
@@ -1088,6 +1092,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */
/* data extent mappings for regular files must be aligned to extent size hint */
#define XFS_DIFLAG2_FORCEALIGN_BIT 5
+#define XFS_DIFLAG2_ATOMICWRITES_BIT 6
#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT)
@@ -1095,10 +1100,12 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT)
#define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT)
#define XFS_DIFLAG2_FORCEALIGN (1 << XFS_DIFLAG2_FORCEALIGN_BIT)
+#define XFS_DIFLAG2_ATOMICWRITES (1 << XFS_DIFLAG2_ATOMICWRITES_BIT)
#define XFS_DIFLAG2_ANY \
(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
- XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_FORCEALIGN)
+ XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_FORCEALIGN | \
+ XFS_DIFLAG2_ATOMICWRITES)
static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
{
@@ -178,7 +178,10 @@ xfs_inode_from_disk(
struct xfs_inode *ip,
struct xfs_dinode *from)
{
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
struct inode *inode = VFS_I(ip);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_sb *sbp = &mp->m_sb;
int error;
xfs_failaddr_t fa;
@@ -261,6 +264,13 @@ xfs_inode_from_disk(
}
if (xfs_is_reflink_inode(ip))
xfs_ifork_init_cow(ip);
+
+ if (xfs_inode_has_atomicwrites(ip)) {
+ if (sbp->sb_blocksize < target->bt_bdev_awu_min ||
+ sbp->sb_blocksize * ip->i_extsize > target->bt_bdev_awu_max)
+ ip->i_diflags2 &= ~XFS_DIFLAG2_ATOMICWRITES;
+ }
+
return 0;
out_destroy_data_fork:
@@ -460,6 +470,25 @@ xfs_dinode_verify_nrext64(
return NULL;
}
+static xfs_failaddr_t
+xfs_inode_validate_atomicwrites(
+ struct xfs_mount *mp,
+ bool forcealign)
+{
+ /* superblock rocompat feature flag */
+ if (!xfs_has_atomicwrites(mp))
+ return __this_address;
+
+ /*
+ * forcealign is required, so rely on sanity checks in
+ * xfs_inode_validate_forcealign()
+ */
+ if (!forcealign)
+ return __this_address;
+
+ return NULL;
+}
+
xfs_failaddr_t
xfs_dinode_verify(
struct xfs_mount *mp,
@@ -624,6 +653,13 @@ xfs_dinode_verify(
return fa;
}
+ if (flags2 & XFS_DIFLAG2_ATOMICWRITES) {
+ fa = xfs_inode_validate_atomicwrites(mp,
+ flags2 & XFS_DIFLAG2_FORCEALIGN);
+ if (fa)
+ return fa;
+ }
+
return NULL;
}
@@ -165,6 +165,8 @@ xfs_sb_version_to_features(
features |= XFS_FEAT_INOBTCNT;
if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FORCEALIGN)
features |= XFS_FEAT_FORCEALIGN;
+ if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES)
+ features |= XFS_FEAT_ATOMICWRITES;
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE)
features |= XFS_FEAT_FTYPE;
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES)
@@ -2057,6 +2057,8 @@ int
xfs_init_buftarg(
struct xfs_buftarg *btp,
size_t logical_sectorsize,
+ unsigned int awu_min,
+ unsigned int awu_max,
const char *descr)
{
/* Set up device logical sector size mask */
@@ -2083,6 +2085,9 @@ xfs_init_buftarg(
btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan;
btp->bt_shrinker->private_data = btp;
shrinker_register(btp->bt_shrinker);
+
+ btp->bt_bdev_awu_min = awu_min;
+ btp->bt_bdev_awu_max = awu_max;
return 0;
out_destroy_io_count:
@@ -2099,6 +2104,7 @@ xfs_alloc_buftarg(
{
struct xfs_buftarg *btp;
const struct dax_holder_operations *ops = NULL;
+ unsigned int awu_min = 0, awu_max = 0;
#if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE)
ops = &xfs_dax_holder_operations;
@@ -2112,6 +2118,13 @@ xfs_alloc_buftarg(
btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
mp, ops);
+ if (bdev_can_atomic_write(btp->bt_bdev)) {
+ struct request_queue *q = bdev_get_queue(btp->bt_bdev);
+
+ awu_min = queue_atomic_write_unit_min_bytes(q);
+ awu_max = queue_atomic_write_unit_max_bytes(q);
+ }
+
/*
* When allocating the buftargs we have not yet read the super block and
* thus don't know the file system sector size yet.
@@ -2119,7 +2132,7 @@ xfs_alloc_buftarg(
if (xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev)))
goto error_free;
if (xfs_init_buftarg(btp, bdev_logical_block_size(btp->bt_bdev),
- mp->m_super->s_id))
+ awu_min, awu_max, mp->m_super->s_id))
goto error_free;
return btp;
@@ -124,6 +124,8 @@ struct xfs_buftarg {
struct percpu_counter bt_io_count;
struct ratelimit_state bt_ioerror_rl;
+ unsigned int bt_bdev_awu_min, bt_bdev_awu_max;
+
/* built-in cache, if we're not using the perag one */
struct xfs_buf_cache bt_cache[];
};
@@ -393,7 +395,7 @@ bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic);
/* for xfs_buf_mem.c only: */
int xfs_init_buftarg(struct xfs_buftarg *btp, size_t logical_sectorsize,
- const char *descr);
+ unsigned int awu_min, unsigned int awu_max, const char *descr);
void xfs_destroy_buftarg(struct xfs_buftarg *btp);
#endif /* __XFS_BUF_H__ */
@@ -609,6 +609,8 @@ xfs_ip2xflags(
flags |= FS_XFLAG_COWEXTSIZE;
if (ip->i_diflags2 & XFS_DIFLAG2_FORCEALIGN)
flags |= FS_XFLAG_FORCEALIGN;
+ if (ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES)
+ flags |= FS_XFLAG_ATOMICWRITES;
}
if (xfs_inode_has_attr_fork(ip))
@@ -321,6 +321,11 @@ static inline bool xfs_inode_has_extsize(struct xfs_inode *ip)
return ip->i_diflags & XFS_DIFLAG_EXTSIZE;
}
+static inline bool xfs_inode_has_atomicwrites(struct xfs_inode *ip)
+{
+ return ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES;
+}
+
/*
* Return the buftarg used for data allocations on a given inode.
*/
@@ -1112,6 +1112,8 @@ xfs_flags2diflags2(
di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
if (xflags & FS_XFLAG_FORCEALIGN)
di_flags2 |= XFS_DIFLAG2_FORCEALIGN;
+ if (xflags & FS_XFLAG_ATOMICWRITES)
+ di_flags2 |= XFS_DIFLAG2_ATOMICWRITES;
return di_flags2;
}
@@ -1122,12 +1124,16 @@ xfs_ioctl_setattr_xflags(
struct xfs_inode *ip,
struct fileattr *fa)
{
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
struct xfs_mount *mp = ip->i_mount;
+ struct xfs_sb *sbp = &mp->m_sb;
bool rtflag = (fa->fsx_xflags & FS_XFLAG_REALTIME);
+ bool atomic_writes = fa->fsx_xflags & FS_XFLAG_ATOMICWRITES;
uint64_t i_flags2;
- if (rtflag != XFS_IS_REALTIME_INODE(ip)) {
- /* Can't change realtime flag if any extents are allocated. */
+ /* Can't change RT or atomic flags if any extents are allocated. */
+ if (rtflag != XFS_IS_REALTIME_INODE(ip) ||
+ atomic_writes != xfs_inode_has_atomicwrites(ip)) {
if (ip->i_df.if_nextents || ip->i_delayed_blks)
return -EINVAL;
}
@@ -1164,6 +1170,17 @@ xfs_ioctl_setattr_xflags(
return -EINVAL;
}
+ if (atomic_writes) {
+ if (!xfs_has_atomicwrites(mp))
+ return -EINVAL;
+ if (target->bt_bdev_awu_min > sbp->sb_blocksize)
+ return -EINVAL;
+ if (target->bt_bdev_awu_max < fa->fsx_extsize)
+ return -EINVAL;
+ if (!(fa->fsx_xflags & FS_XFLAG_FORCEALIGN))
+ return -EINVAL;
+ }
+
ip->i_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
ip->i_diflags2 = i_flags2;
@@ -293,6 +293,7 @@ typedef struct xfs_mount {
#define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */
#define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */
#define XFS_FEAT_FORCEALIGN (1ULL << 27) /* aligned file data extents */
+#define XFS_FEAT_ATOMICWRITES (1ULL << 28) /* atomic writes support */
/* Mount features */
#define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */
@@ -357,6 +358,7 @@ __XFS_HAS_FEAT(bigtime, BIGTIME)
__XFS_HAS_FEAT(needsrepair, NEEDSREPAIR)
__XFS_HAS_FEAT(large_extent_counts, NREXT64)
__XFS_HAS_FEAT(forcealign, FORCEALIGN)
+__XFS_HAS_FEAT(atomicwrites, ATOMICWRITES)
/*
* Mount features
@@ -1710,6 +1710,10 @@ xfs_fs_fill_super(
xfs_warn(mp,
"EXPERIMENTAL forced data extent alignment feature in use. Use at your own risk!");
+ if (xfs_has_atomicwrites(mp))
+ xfs_warn(mp,
+"EXPERIMENTAL atomicwrites feature in use. Use at your own risk!");
+
if (xfs_has_reflink(mp)) {
if (mp->m_sb.sb_rblocks) {
xfs_alert(mp,
Add initial support for FS_XFLAG_ATOMICWRITES for forcealign enabled. Current kernel support for atomic writes is based on HW support (for atomic writes). As such, it is required to ensure extent alignment with atomic_write_unit_max so that an atomic write can result in a single HW-compliant IO operation. rtvol also guarantees extent alignment, but we are basing support initially on forcealign, which is not supported for rtvol yet. Signed-off-by: John Garry <john.g.garry@oracle.com> --- fs/xfs/libxfs/xfs_format.h | 11 +++++++++-- fs/xfs/libxfs/xfs_inode_buf.c | 36 +++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_sb.c | 2 ++ fs/xfs/xfs_buf.c | 15 ++++++++++++++- fs/xfs/xfs_buf.h | 4 +++- fs/xfs/xfs_inode.c | 2 ++ fs/xfs/xfs_inode.h | 5 +++++ fs/xfs/xfs_ioctl.c | 21 ++++++++++++++++++-- fs/xfs/xfs_mount.h | 2 ++ fs/xfs/xfs_super.c | 4 ++++ 10 files changed, 96 insertions(+), 6 deletions(-)