@@ -103,7 +103,12 @@ typedef struct xfs_sb {
xfs_ino_t sb_rootino; /* root inode number */
xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */
xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */
- xfs_agblock_t sb_rextsize; /* realtime extent size, blocks */
+ /*
+ * Realtime extent size, blocks. If the FORCEALIGN feature is set,
+ * the allocation group size must be a multiple of this value, and
+ * file data allocations will be aligned to this value.
+ */
+ xfs_agblock_t sb_rextsize;
xfs_agblock_t sb_agblocks; /* size of an allocation group */
xfs_agnumber_t sb_agcount; /* number of allocation groups */
xfs_extlen_t sb_rbmblocks; /* number of rt bitmap blocks */
@@ -387,6 +392,8 @@ xfs_sb_has_compat_feature(
#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */
#define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */
#define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */
+/* all AGs and data allocations must be aligned to rextsize, even for !rt files */
+#define XFS_SB_FEAT_RO_COMPAT_FORCEALIGN (1 << 30)
#define XFS_SB_FEAT_RO_COMPAT_ALL \
(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
@@ -1206,6 +1213,8 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */
#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */
#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */
+/* data extent mappings for regular files must be aligned to extent size hint */
+#define XFS_DIFLAG2_FORCEALIGN_BIT 5
#define XFS_DIFLAG2_METADIR_BIT 63 /* filesystem metadata */
#define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT)
@@ -1239,9 +1248,12 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
*/
#define XFS_DIFLAG2_METADIR (1ULL << XFS_DIFLAG2_METADIR_BIT)
+#define XFS_DIFLAG2_FORCEALIGN (1ULL << XFS_DIFLAG2_FORCEALIGN_BIT)
+
#define XFS_DIFLAG2_ANY \
(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
- XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADIR)
+ XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADIR | \
+ XFS_DIFLAG2_FORCEALIGN)
static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
{
@@ -726,6 +726,14 @@ xfs_dinode_verify(
} else if (nextents + naextents == 0 && nblocks != 0)
return __this_address;
+ if (flags2 & XFS_DIFLAG2_FORCEALIGN) {
+ fa = xfs_inode_validate_forcealign(mp, mode, flags,
+ be32_to_cpu(dip->di_extsize),
+ be32_to_cpu(dip->di_cowextsize));
+ if (fa)
+ return fa;
+ }
+
return NULL;
}
@@ -900,3 +908,31 @@ xfs_inode_validate_cowextsize(
return NULL;
}
+
+/* Validate the forcealign inode flag */
+xfs_failaddr_t
+xfs_inode_validate_forcealign(
+ struct xfs_mount *mp,
+ uint16_t mode,
+ uint16_t flags,
+ uint32_t extsize,
+ uint32_t cowextsize)
+{
+ /* superblock rocompat feature flag required */
+ if (!xfs_has_forcealign(mp))
+ return __this_address;
+
+ /* Only regular files and directories */
+ if (!S_ISDIR(mode) && !S_ISREG(mode))
+ return __this_address;
+
+ /* Requires no extent size hint */
+ if (extsize != 0)
+ return __this_address;
+
+ /* Requires no cow extent size hint */
+ if (cowextsize != 0)
+ return __this_address;
+
+ return NULL;
+}
@@ -36,6 +36,9 @@ xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp,
xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
uint32_t cowextsize, uint16_t mode, uint16_t flags,
uint64_t flags2);
+xfs_failaddr_t xfs_inode_validate_forcealign(struct xfs_mount *mp,
+ uint16_t mode, uint16_t flags, uint32_t extsize,
+ uint32_t cowextsize);
static inline uint64_t xfs_inode_encode_bigtime(struct timespec64 tv)
{
@@ -81,6 +81,8 @@ xfs_flags2diflags2(
di_flags2 |= XFS_DIFLAG2_DAX;
if (xflags & FS_XFLAG_COWEXTSIZE)
di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
+ if (xflags & FS_XFLAG_FORCEALIGN)
+ di_flags2 |= XFS_DIFLAG2_FORCEALIGN;
return di_flags2;
}
@@ -127,6 +129,8 @@ xfs_ip2xflags(
flags |= FS_XFLAG_DAX;
if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
flags |= FS_XFLAG_COWEXTSIZE;
+ if (ip->i_diflags2 & XFS_DIFLAG2_FORCEALIGN)
+ flags |= FS_XFLAG_FORCEALIGN;
}
if (xfs_inode_has_attr_fork(ip))
@@ -228,6 +232,8 @@ xfs_inode_inherit_flags2(
ip->i_diflags2 |= XFS_DIFLAG2_DAX;
if (pip->i_diflags2 & XFS_DIFLAG2_METADIR)
ip->i_diflags2 |= XFS_DIFLAG2_METADIR;
+ if (pip->i_diflags2 & XFS_DIFLAG2_FORCEALIGN)
+ ip->i_diflags2 |= XFS_DIFLAG2_FORCEALIGN;
/* Don't let invalid cowextsize hints propagate. */
failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
@@ -236,6 +242,14 @@ xfs_inode_inherit_flags2(
ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
ip->i_cowextsize = 0;
}
+
+ if (ip->i_diflags2 & XFS_DIFLAG2_FORCEALIGN) {
+ failaddr = xfs_inode_validate_forcealign(ip->i_mount,
+ VFS_I(ip)->i_mode, ip->i_diflags, ip->i_extsize,
+ ip->i_cowextsize);
+ if (failaddr)
+ ip->i_diflags2 &= ~XFS_DIFLAG2_FORCEALIGN;
+ }
}
/* Initialise an inode's attributes. */
@@ -167,6 +167,9 @@ xfs_sb_version_to_features(
features |= XFS_FEAT_REFLINK;
if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT)
features |= XFS_FEAT_INOBTCNT;
+ if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FORCEALIGN)
+ features |= XFS_FEAT_FORCEALIGN;
+
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE)
features |= XFS_FEAT_FTYPE;
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES)
@@ -370,6 +373,27 @@ xfs_validate_sb_rtgroups(
return 0;
}
+static int
+xfs_validate_sb_forcealign(
+ struct xfs_mount *mp,
+ struct xfs_sb *sbp)
+{
+ if (sbp->sb_rextsize == 0) {
+ xfs_warn(mp,
+ "Cannot have forced allocation alignment of zero.");
+ return -EINVAL;
+ }
+
+ if (sbp->sb_agblocks % sbp->sb_rextsize != 0) {
+ xfs_warn(mp,
+ "Allocation group size %u not aligned to forcealign %u.",
+ sbp->sb_agblocks, sbp->sb_rextsize);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* Check the validity of the SB. */
STATIC int
xfs_validate_sb_common(
@@ -437,6 +461,12 @@ xfs_validate_sb_common(
if (error)
return error;
}
+
+ if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FORCEALIGN) {
+ error = xfs_validate_sb_forcealign(mp, sbp);
+ if (error)
+ return error;
+ }
} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
xfs_notice(mp,
@@ -666,6 +666,46 @@ xrep_dinode_extsize_hints(
}
}
+/* Fix forcealign flag. */
+STATIC void
+xrep_dinode_forcealign(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip)
+{
+ uint64_t flags2;
+ uint16_t flags;
+ uint16_t mode;
+
+ trace_xrep_dinode_forcealign(sc, dip);
+
+ if (dip->di_version < 3)
+ return;
+
+ mode = be16_to_cpu(dip->di_mode);
+ flags = be16_to_cpu(dip->di_flags);
+ flags2 = be64_to_cpu(dip->di_flags2);
+
+ if (!(flags2 & XFS_DIFLAG2_FORCEALIGN))
+ return;
+
+ if (!xfs_has_forcealign(sc->mp))
+ flags2 &= ~XFS_DIFLAG2_FORCEALIGN;
+
+ if (!S_ISDIR(mode) && !S_ISREG(mode))
+ flags2 &= ~XFS_DIFLAG2_FORCEALIGN;
+
+ if (flags & XFS_DIFLAG_REALTIME)
+ flags2 &= ~XFS_DIFLAG2_FORCEALIGN;
+
+ if (dip->di_extsize != 0)
+ flags2 &= ~XFS_DIFLAG2_FORCEALIGN;
+
+ if (dip->di_cowextsize != 0)
+ flags2 &= ~XFS_DIFLAG2_FORCEALIGN;
+
+ dip->di_flags2 = cpu_to_be64(flags2);
+}
+
/* Count extents and blocks for an inode given an rmap. */
STATIC int
xrep_dinode_walk_rmap(
@@ -1506,6 +1546,7 @@ xrep_dinode_core(
xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
xrep_dinode_size(ri, dip);
xrep_dinode_extsize_hints(sc, dip);
+ xrep_dinode_forcealign(sc, dip);
xrep_dinode_zap_forks(ri, dip);
write:
@@ -2559,6 +2559,7 @@ DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_zap_forks);
DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_zap_dfork);
DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_zap_afork);
DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_ensure_forkoff);
+DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_forcealign);
DECLARE_EVENT_CLASS(xrep_inode_class,
TP_PROTO(struct xfs_scrub *sc),
@@ -326,6 +326,11 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
return ip->i_diflags2 & XFS_DIFLAG2_NREXT64;
}
+static inline bool xfs_inode_force_align(struct xfs_inode *ip)
+{
+ return ip->i_diflags2 & XFS_DIFLAG2_FORCEALIGN;
+}
+
static inline bool xfs_inode_has_bigallocunit(struct xfs_inode *ip)
{
return XFS_IS_REALTIME_INODE(ip) && ip->i_mount->m_sb.sb_rextsize > 1;
@@ -1144,6 +1144,20 @@ xfs_ioctl_setattr_xflags(
if (i_flags2 && !xfs_has_v3inodes(mp))
return -EINVAL;
+ /*
+ * Force-align requires a zero extent size hint and a zero cow extent
+ * size hint.
+ */
+ if (fa->fsx_xflags & FS_XFLAG_FORCEALIGN) {
+ if (!xfs_has_forcealign(mp))
+ return -EINVAL;
+ if (fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)
+ return -EINVAL;
+ if (fa->fsx_xflags & (FS_XFLAG_EXTSIZE |
+ FS_XFLAG_EXTSZINHERIT))
+ return -EINVAL;
+ }
+
ip->i_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
ip->i_diflags2 = i_flags2;
@@ -310,6 +310,7 @@ typedef struct xfs_mount {
#define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */
#define XFS_FEAT_METADIR (1ULL << 27) /* metadata directory tree */
#define XFS_FEAT_RTGROUPS (1ULL << 28) /* realtime groups */
+#define XFS_FEAT_FORCEALIGN (1ULL << 29) /* aligned file data extents */
/* Mount features */
#define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */
@@ -375,6 +376,7 @@ __XFS_HAS_FEAT(needsrepair, NEEDSREPAIR)
__XFS_HAS_FEAT(large_extent_counts, NREXT64)
__XFS_HAS_FEAT(metadir, METADIR)
__XFS_HAS_FEAT(rtgroups, RTGROUPS)
+__XFS_HAS_FEAT(forcealign, FORCEALIGN)
static inline bool xfs_has_rtrmapbt(struct xfs_mount *mp)
{
@@ -1238,6 +1238,10 @@ xfs_growfs_rt(
if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize)
return -EINVAL;
+ /* Cannot change rt extent size when forcealign is set. */
+ if (xfs_has_forcealign(mp) && in->extsize != sbp->sb_rextsize)
+ return -EINVAL;
+
/* Range check the extent size. */
if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE ||
XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE)
@@ -1727,6 +1727,10 @@ xfs_fs_fill_super(
xfs_warn(mp,
"EXPERIMENTAL realtime allocation group feature in use. Use at your own risk!");
+ if (xfs_has_forcealign(mp))
+ xfs_warn(mp,
+"EXPERIMENTAL forced data extent alignment feature in use. Use at your own risk!");
+
if (xfs_has_reflink(mp)) {
/*
* Reflink doesn't support pagecache pages that span multiple
@@ -140,6 +140,8 @@ struct fsxattr {
#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */
#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */
+/* data extent mappings for regular files must be aligned to extent size hint */
+#define FS_XFLAG_FORCEALIGN 0x00020000
#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
/* the read-only stuff doesn't really belong here, but any other place is