Message ID | 20200606082745.15174-7-chandanrlinux@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | xfs: Extend per-inode extent counters. | expand |
On Sat, Jun 06, 2020 at 01:57:44PM +0530, Chandan Babu R wrote: > This commit extends the per-inode data extent counter to 47 bits. The > length of 47-bits was chosen because, > Maximum file size = 2^63. > Maximum extent count when using 64k block size = 2^63 / 2^16 = 2^47. > > The following changes are made to accomplish this, > 1. A new ro-compat superblock flag to prevent older kernels from > mounting the filesystem in read-write mode. This flag is set for the > first time when an inode would end up having more than 2^31 extents. > 3. Carve out a new 32-bit field from xfs_dinode->di_pad2[]. This field > holds the most significant 15 bits of the data extent counter. On a 1k block V5 fs, the maximum extent count is 2^(63-10) = 2^53. If you're going to allocate 32 bits of space from di_pad2 to expand the data fork's nextents, let's use the entire bitspace. > 2. A new inode->di_flags2 flag to indicate that the newly added field > contains valid data. This flag is set when one of the following two > conditions are met, > - When the inode is about to have more than 2^31 extents. > - When flushing the incore inode (See xfs_iflush_int()), if > the superblock ro-compat flag is already set. > > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > --- > fs/xfs/libxfs/xfs_bmap.c | 40 ++++++++-------- > fs/xfs/libxfs/xfs_format.h | 30 ++++++++---- > fs/xfs/libxfs/xfs_inode_buf.c | 46 +++++++++++++++--- > fs/xfs/libxfs/xfs_inode_buf.h | 2 + > fs/xfs/libxfs/xfs_inode_fork.c | 84 ++++++++++++++++++++++++++------- > fs/xfs/libxfs/xfs_inode_fork.h | 3 +- > fs/xfs/libxfs/xfs_log_format.h | 5 +- > fs/xfs/libxfs/xfs_types.h | 5 +- > fs/xfs/scrub/inode.c | 9 ++-- > fs/xfs/xfs_inode.c | 6 ++- > fs/xfs/xfs_inode_item.c | 5 +- > fs/xfs/xfs_inode_item_recover.c | 16 +++++-- > 12 files changed, 184 insertions(+), 67 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > index f75b70ae7b1f..73e552678adc 100644 > --- a/fs/xfs/libxfs/xfs_bmap.c > +++ b/fs/xfs/libxfs/xfs_bmap.c > @@ -53,9 +53,9 @@ xfs_bmap_compute_maxlevels( > int whichfork, /* data or attr fork */ > int dir_bmbt) /* Dir or non-dir data fork */ > { > + uint64_t maxleafents; /* max leaf entries possible */ > int level; /* btree level */ > uint maxblocks; /* max blocks at this level */ > - uint maxleafents; /* max leaf entries possible */ > int maxrootrecs; /* max records in root block */ > int minleafrecs; /* min records in leaf block */ > int minnoderecs; /* min records in node block */ > @@ -477,7 +477,7 @@ xfs_bmap_check_leaf_extents( > if (bp_release) > xfs_trans_brelse(NULL, bp); > error_norelse: > - xfs_warn(mp, "%s: BAD after btree leaves for %d extents", > + xfs_warn(mp, "%s: BAD after btree leaves for %llu extents", > __func__, i); > xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__); > xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); > @@ -918,7 +918,7 @@ xfs_bmap_local_to_extents( > xfs_iext_first(ifp, &icur); > xfs_iext_insert(ip, &icur, &rec, 0); > > - error = xfs_next_set(ip, whichfork, 1); > + error = xfs_next_set(tp, ip, whichfork, 1); > if (error) > goto done; > > @@ -1610,7 +1610,7 @@ xfs_bmap_add_extent_delay_real( > xfs_iext_prev(ifp, &bma->icur); > xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); > > - error = xfs_next_set(bma->ip, whichfork, -1); > + error = xfs_next_set(bma->tp, bma->ip, whichfork, -1); > if (error) > goto done; > > @@ -1717,7 +1717,7 @@ xfs_bmap_add_extent_delay_real( > PREV.br_state = new->br_state; > xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); > > - error = xfs_next_set(bma->ip, whichfork, 1); > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > if (error) > goto done; > > @@ -1786,7 +1786,7 @@ xfs_bmap_add_extent_delay_real( > */ > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > - error = xfs_next_set(bma->ip, whichfork, 1); > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > if (error) > goto done; > > @@ -1876,7 +1876,7 @@ xfs_bmap_add_extent_delay_real( > */ > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > - error = xfs_next_set(bma->ip, whichfork, 1); > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > if (error) > goto done; > > @@ -1965,7 +1965,7 @@ xfs_bmap_add_extent_delay_real( > xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); > xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); > > - error = xfs_next_set(bma->ip, whichfork, 1); > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > if (error) > goto done; > > @@ -2172,7 +2172,7 @@ xfs_bmap_add_extent_unwritten_real( > xfs_iext_prev(ifp, icur); > xfs_iext_update_extent(ip, state, icur, &LEFT); > > - error = xfs_next_set(ip, whichfork, -2); > + error = xfs_next_set(tp, ip, whichfork, -2); > if (error) > goto done; > > @@ -2228,7 +2228,7 @@ xfs_bmap_add_extent_unwritten_real( > xfs_iext_prev(ifp, icur); > xfs_iext_update_extent(ip, state, icur, &LEFT); > > - error = xfs_next_set(ip, whichfork, -1); > + error = xfs_next_set(tp, ip, whichfork, -1); > if (error) > goto done; > > @@ -2274,7 +2274,7 @@ xfs_bmap_add_extent_unwritten_real( > xfs_iext_prev(ifp, icur); > xfs_iext_update_extent(ip, state, icur, &PREV); > > - error = xfs_next_set(ip, whichfork, -1); > + error = xfs_next_set(tp, ip, whichfork, -1); > if (error) > goto done; > > @@ -2385,7 +2385,7 @@ xfs_bmap_add_extent_unwritten_real( > xfs_iext_update_extent(ip, state, icur, &PREV); > xfs_iext_insert(ip, icur, new, state); > > - error = xfs_next_set(ip, whichfork, 1); > + error = xfs_next_set(tp, ip, whichfork, 1); > if (error) > goto done; > > @@ -2464,7 +2464,7 @@ xfs_bmap_add_extent_unwritten_real( > xfs_iext_next(ifp, icur); > xfs_iext_insert(ip, icur, new, state); > > - error = xfs_next_set(ip, whichfork, 1); > + error = xfs_next_set(tp, ip, whichfork, 1); > if (error) > goto done; > > @@ -2519,7 +2519,7 @@ xfs_bmap_add_extent_unwritten_real( > xfs_iext_insert(ip, icur, &r[1], state); > xfs_iext_insert(ip, icur, &r[0], state); > > - error = xfs_next_set(ip, whichfork, 2); > + error = xfs_next_set(tp, ip, whichfork, 2); > if (error) > goto done; > > @@ -2838,7 +2838,7 @@ xfs_bmap_add_extent_hole_real( > xfs_iext_prev(ifp, icur); > xfs_iext_update_extent(ip, state, icur, &left); > > - error = xfs_next_set(ip, whichfork, -1); > + error = xfs_next_set(tp, ip, whichfork, -1); > if (error) > goto done; > > @@ -2940,7 +2940,7 @@ xfs_bmap_add_extent_hole_real( > */ > xfs_iext_insert(ip, icur, new, state); > > - error = xfs_next_set(ip, whichfork, 1); > + error = xfs_next_set(tp, ip, whichfork, 1); > if (error) > goto done; > > @@ -5140,7 +5140,7 @@ xfs_bmap_del_extent_real( > xfs_iext_remove(ip, icur, state); > xfs_iext_prev(ifp, icur); > > - error = xfs_next_set(ip, whichfork, -1); > + error = xfs_next_set(tp, ip, whichfork, -1); > if (error) > goto done; > > @@ -5252,7 +5252,7 @@ xfs_bmap_del_extent_real( > } else > flags |= xfs_ilog_fext(whichfork); > > - error = xfs_next_set(ip, whichfork, 1); > + error = xfs_next_set(tp, ip, whichfork, 1); > if (error) > goto done; > > @@ -5722,7 +5722,7 @@ xfs_bmse_merge( > * Update the on-disk extent count, the btree if necessary and log the > * inode. > */ > - error = xfs_next_set(ip, whichfork, -1); > + error = xfs_next_set(tp, ip, whichfork, -1); > if (error) > goto done; > > @@ -6113,7 +6113,7 @@ xfs_bmap_split_extent( > xfs_iext_next(ifp, &icur); > xfs_iext_insert(ip, &icur, &new, 0); > > - error = xfs_next_set(ip, whichfork, 1); > + error = xfs_next_set(tp, ip, whichfork, 1); > if (error) > goto del_cursor; > > diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h > index b42a52bfa1e9..91bee33aa988 100644 > --- a/fs/xfs/libxfs/xfs_format.h > +++ b/fs/xfs/libxfs/xfs_format.h > @@ -449,10 +449,12 @@ xfs_sb_has_compat_feature( > #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ > #define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ > #define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */ > +#define XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR (1 << 3) /* 47bit data extents */ I wonder if we could come up with a better name for this... DFORK_EXTENTHI Hmm... BIG_DFORK Hmmm... ULTRAFRAG There we go. "XFS with UltraFrag, part of this complete g@m3r t00lk1t." ;) ... (What do you think of the second suggestion?) > #define XFS_SB_FEAT_RO_COMPAT_ALL \ > (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ > XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ > - XFS_SB_FEAT_RO_COMPAT_REFLINK) > + XFS_SB_FEAT_RO_COMPAT_REFLINK | \ > + XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR) > #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL > static inline bool > xfs_sb_has_ro_compat_feature( > @@ -563,6 +565,18 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp) > (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK); > } > > +static inline bool xfs_sb_version_has47bitext(struct xfs_sb *sbp) > +{ > + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && > + (sbp->sb_features_ro_compat & > + XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR); > +} > + > +static inline void xfs_sb_version_add47bitext(struct xfs_sb *sbp) > +{ > + sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR; > +} > + > /* > * end of superblock version macros > */ > @@ -873,7 +887,7 @@ typedef struct xfs_dinode { > __be64 di_size; /* number of bytes in file */ > __be64 di_nblocks; /* # of direct & btree blocks used */ > __be32 di_extsize; /* basic/minimum extent size for file */ > - __be32 di_nextents; /* number of extents in data fork */ > + __be32 di_nextents_lo; /* number of extents in data fork */ > __be16 di_anextents; /* number of extents in attribute fork*/ > __u8 di_forkoff; /* attr fork offs, <<3 for 64b align */ > __s8 di_aformat; /* format of attr fork's data */ > @@ -891,7 +905,8 @@ typedef struct xfs_dinode { > __be64 di_lsn; /* flush sequence */ > __be64 di_flags2; /* more random flags */ > __be32 di_cowextsize; /* basic cow extent size for file */ > - __u8 di_pad2[12]; /* more padding for future expansion */ > + __be32 di_nextents_hi; > + __u8 di_pad2[8]; /* more padding for future expansion */ > > /* fields only written to during inode creation */ > xfs_timestamp_t di_crtime; /* time created */ > @@ -992,10 +1007,6 @@ enum xfs_dinode_fmt { > ((w) == XFS_DATA_FORK ? \ > (dip)->di_format : \ > (dip)->di_aformat) > -#define XFS_DFORK_NEXTENTS(dip,w) \ > - ((w) == XFS_DATA_FORK ? \ > - be32_to_cpu((dip)->di_nextents) : \ > - be16_to_cpu((dip)->di_anextents)) > > /* > * For block and character special files the 32bit dev_t is stored at the > @@ -1061,12 +1072,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) > #define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */ > #define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ > #define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ > +#define XFS_DIFLAG2_47BIT_NEXTENTS_BIT 3 /* Uses di_nextents_hi field */ > #define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) > #define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) > #define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) > +#define XFS_DIFLAG2_47BIT_NEXTENTS (1 << XFS_DIFLAG2_47BIT_NEXTENTS_BIT) > > #define XFS_DIFLAG2_ANY \ > - (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE) > + (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ > + XFS_DIFLAG2_47BIT_NEXTENTS) > > /* > * Inode number format: > diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c > index 6f84ea85fdd8..8b89fe080f70 100644 > --- a/fs/xfs/libxfs/xfs_inode_buf.c > +++ b/fs/xfs/libxfs/xfs_inode_buf.c > @@ -307,7 +307,8 @@ xfs_inode_to_disk( > to->di_size = cpu_to_be64(from->di_size); > to->di_nblocks = cpu_to_be64(from->di_nblocks); > to->di_extsize = cpu_to_be32(from->di_extsize); > - to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); > + to->di_nextents_lo = cpu_to_be32(xfs_ifork_nextents(&ip->i_df) & > + 0xffffffffU); > to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp)); > to->di_forkoff = from->di_forkoff; > to->di_aformat = xfs_ifork_format(ip->i_afp); > @@ -322,6 +323,10 @@ xfs_inode_to_disk( > to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec); > to->di_flags2 = cpu_to_be64(from->di_flags2); > to->di_cowextsize = cpu_to_be32(from->di_cowextsize); > + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > + to->di_nextents_hi > + = cpu_to_be32(xfs_ifork_nextents(&ip->i_df) > + >> 32); /me kinda hates the indentation here, would a convenience variable reduce the amount of linewrapping here? Oh, right, we're in a new epoch now; just go past 80 columns. > to->di_ino = cpu_to_be64(ip->i_ino); > to->di_lsn = cpu_to_be64(lsn); > memset(to->di_pad2, 0, sizeof(to->di_pad2)); > @@ -360,7 +365,7 @@ xfs_log_dinode_to_disk( > to->di_size = cpu_to_be64(from->di_size); > to->di_nblocks = cpu_to_be64(from->di_nblocks); > to->di_extsize = cpu_to_be32(from->di_extsize); > - to->di_nextents = cpu_to_be32(from->di_nextents); > + to->di_nextents_lo = cpu_to_be32(from->di_nextents_lo); > to->di_anextents = cpu_to_be16(from->di_anextents); > to->di_forkoff = from->di_forkoff; > to->di_aformat = from->di_aformat; > @@ -375,6 +380,9 @@ xfs_log_dinode_to_disk( > to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); > to->di_flags2 = cpu_to_be64(from->di_flags2); > to->di_cowextsize = cpu_to_be32(from->di_cowextsize); > + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > + to->di_nextents_hi = > + cpu_to_be32(from->di_nextents_hi); > to->di_ino = cpu_to_be64(from->di_ino); > to->di_lsn = cpu_to_be64(from->di_lsn); > memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); > @@ -391,7 +399,9 @@ xfs_dinode_verify_fork( > struct xfs_mount *mp, > int whichfork) > { > - uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork); > + xfs_extnum_t di_nextents; > + > + di_nextents = xfs_dfork_nextents(&mp->m_sb, dip, whichfork); > > switch (XFS_DFORK_FORMAT(dip, whichfork)) { > case XFS_DINODE_FMT_LOCAL: > @@ -462,6 +472,8 @@ xfs_dinode_verify( > uint16_t flags; > uint64_t flags2; > uint64_t di_size; > + xfs_extnum_t nextents; > + int64_t nblocks; > > if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) > return __this_address; > @@ -492,10 +504,12 @@ xfs_dinode_verify( > if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) > return __this_address; > > + nextents = xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK); > + nextents += xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK); > + nblocks = be64_to_cpu(dip->di_nblocks); > + > /* Fork checks carried over from xfs_iformat_fork */ > - if (mode && > - be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > > - be64_to_cpu(dip->di_nblocks)) > + if (mode && nextents > nblocks) > return __this_address; > > if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) > @@ -716,3 +730,23 @@ xfs_inode_validate_cowextsize( > > return NULL; > } > + > +xfs_extnum_t > +xfs_dfork_nextents( > + struct xfs_sb *sbp, > + struct xfs_dinode *dip, > + int whichfork) > +{ > + xfs_extnum_t nextents; > + > + if (whichfork == XFS_DATA_FORK) { > + nextents = be32_to_cpu(dip->di_nextents_lo); > + if (xfs_sb_version_has_v3inode(sbp) > + && (dip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)) Please don't align the second line of the if test with the if body. Or maybe just create a "xfs_inode_has_big_dfork" helper to encapsulate this, like we do for reflink/hascow/realtime inodes. > + nextents |= (u64)(be32_to_cpu(dip->di_nextents_hi)) > + << 32; > + return nextents; > + } else { > + return be16_to_cpu(dip->di_anextents); I suspect you could reduce the indenting here by inverting the logic, e.g. if (attr fork) return be16_to_cpu(anextents); nextents = be32_to_cpu(nextents_lo); if (xfs_inode_has_big_dfork()) nextents += be32_to_cpu(nextents_hi); return nextents; > + } > +} > diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h > index 865ac493c72a..4583db53b933 100644 > --- a/fs/xfs/libxfs/xfs_inode_buf.h > +++ b/fs/xfs/libxfs/xfs_inode_buf.h > @@ -65,5 +65,7 @@ xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp, > xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, > uint32_t cowextsize, uint16_t mode, uint16_t flags, > uint64_t flags2); > +xfs_extnum_t xfs_dfork_nextents(struct xfs_sb *sbp, struct xfs_dinode *dip, > + int whichfork); > > #endif /* __XFS_INODE_BUF_H__ */ > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c > index 3bf5a2c391bd..ec682e2d5bcb 100644 > --- a/fs/xfs/libxfs/xfs_inode_fork.c > +++ b/fs/xfs/libxfs/xfs_inode_fork.c > @@ -10,6 +10,7 @@ > #include "xfs_format.h" > #include "xfs_log_format.h" > #include "xfs_trans_resv.h" > +#include "xfs_sb.h" > #include "xfs_mount.h" > #include "xfs_inode.h" > #include "xfs_trans.h" > @@ -103,21 +104,22 @@ xfs_iformat_extents( > int whichfork) > { > struct xfs_mount *mp = ip->i_mount; > + struct xfs_sb *sb = &mp->m_sb; > struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); > + xfs_extnum_t nex = xfs_dfork_nextents(sb, dip, whichfork); > int state = xfs_bmap_fork_to_state(whichfork); > - int nex = XFS_DFORK_NEXTENTS(dip, whichfork); > int size = nex * sizeof(xfs_bmbt_rec_t); > struct xfs_iext_cursor icur; > struct xfs_bmbt_rec *dp; > struct xfs_bmbt_irec new; > - int i; > + xfs_extnum_t i; > > /* > * If the number of extents is unreasonable, then something is wrong and > * we just bail out rather than crash in kmem_alloc() or memcpy() below. > */ > if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) { > - xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", > + xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %llu).", > (unsigned long long) ip->i_ino, nex); > xfs_inode_verifier_error(ip, -EFSCORRUPTED, > "xfs_iformat_extents(1)", dip, sizeof(*dip), > @@ -233,7 +235,11 @@ xfs_iformat_data_fork( > * depend on it. > */ > ip->i_df.if_format = dip->di_format; > - ip->i_df.if_nextents = be32_to_cpu(dip->di_nextents); > + ip->i_df.if_nextents = be32_to_cpu(dip->di_nextents_lo); > + if (ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > + ip->i_df.if_nextents |= > + ((u64)(be32_to_cpu(dip->di_nextents_hi)) << 32); > + > > switch (inode->i_mode & S_IFMT) { > case S_IFIFO: > @@ -729,31 +735,73 @@ xfs_ifork_verify_local_attr( > return 0; > } > > +static int > +xfs_next_set_data( > + struct xfs_trans *tp, > + struct xfs_inode *ip, > + struct xfs_ifork *ifp, > + int delta) > +{ > + struct xfs_mount *mp = ip->i_mount; > + xfs_extnum_t nr_exts; > + > + nr_exts = ifp->if_nextents + delta; > + > + if ((delta > 0 && nr_exts > MAXEXTNUM) > + || (delta < 0 && nr_exts > ifp->if_nextents)) > + return -EOVERFLOW; > + > + if (ifp->if_nextents <= MAXEXTNUM31BIT && > + nr_exts > MAXEXTNUM31BIT && > + !(ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) && > + xfs_sb_version_has_v3inode(&mp->m_sb)) { > + if (!xfs_sb_version_has47bitext(&mp->m_sb)) { Urk. Again, don't indent the if test logic and the if body statements to the same level. > + bool log_sb = false; > + > + spin_lock(&mp->m_sb_lock); > + if (!xfs_sb_version_has47bitext(&mp->m_sb)) { > + xfs_sb_version_add47bitext(&mp->m_sb); > + log_sb = true; > + } > + spin_unlock(&mp->m_sb_lock); > + > + if (log_sb) > + xfs_log_sb(tp); > + } Hm, dynamic filesystem upgrade. This probably ought to log something to dmesg about the upgrade. It might also be a better to make this a separate helper so that it's not triply-indented. > + > + ip->i_d.di_flags2 |= XFS_DIFLAG2_47BIT_NEXTENTS; > + } > + > + ifp->if_nextents = nr_exts; > + > + return 0; > +} > + > int > xfs_next_set( > + struct xfs_trans *tp, > struct xfs_inode *ip, > int whichfork, > int delta) > { > struct xfs_ifork *ifp; > int64_t nr_exts; > - int64_t max_exts; > + int error = 0; > > ifp = XFS_IFORK_PTR(ip, whichfork); > > - if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) > - max_exts = MAXEXTNUM; > - else if (whichfork == XFS_ATTR_FORK) > - max_exts = MAXAEXTNUM; > - else > - ASSERT(0); > - > - nr_exts = ifp->if_nextents + delta; > - if ((delta > 0 && nr_exts > max_exts) > - || (delta < 0 && nr_exts < 0)) > - return -EOVERFLOW; > + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { > + error = xfs_next_set_data(tp, ip, ifp, delta); > + } else if (whichfork == XFS_ATTR_FORK) { > + nr_exts = ifp->if_nextents + delta; > + if ((delta > 0 && nr_exts > MAXAEXTNUM) > + || (delta < 0 && nr_exts < 0)) > + return -EOVERFLOW; > > - ifp->if_nextents = nr_exts; > + ifp->if_nextents = nr_exts; > + } else { > + ASSERT(0); > + } > > - return 0; > + return error; > } > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > index a84ae42ace79..c74fa6371cc8 100644 > --- a/fs/xfs/libxfs/xfs_inode_fork.h > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > @@ -173,5 +173,6 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); > int xfs_ifork_verify_local_data(struct xfs_inode *ip); > int xfs_ifork_verify_local_attr(struct xfs_inode *ip); > > -int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); > +int xfs_next_set(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, > + int delta); > #endif /* __XFS_INODE_FORK_H__ */ > diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h > index e3400c9c71cd..879aadff7692 100644 > --- a/fs/xfs/libxfs/xfs_log_format.h > +++ b/fs/xfs/libxfs/xfs_log_format.h > @@ -396,7 +396,7 @@ struct xfs_log_dinode { > xfs_fsize_t di_size; /* number of bytes in file */ > xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ > xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ > - xfs_extnum_t di_nextents; /* number of extents in data fork */ > + uint32_t di_nextents_lo; /* number of extents in data fork */ > xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ > uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ > int8_t di_aformat; /* format of attr fork's data */ > @@ -414,7 +414,8 @@ struct xfs_log_dinode { > xfs_lsn_t di_lsn; /* flush sequence */ > uint64_t di_flags2; /* more random flags */ > uint32_t di_cowextsize; /* basic cow extent size for file */ > - uint8_t di_pad2[12]; /* more padding for future expansion */ > + uint32_t di_nextents_hi; > + uint8_t di_pad2[8]; /* more padding for future expansion */ > > /* fields only written to during inode creation */ > xfs_ictimestamp_t di_crtime; /* time created */ > diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h > index 0a3041ad5bec..c68ff2178976 100644 > --- a/fs/xfs/libxfs/xfs_types.h > +++ b/fs/xfs/libxfs/xfs_types.h > @@ -12,7 +12,7 @@ typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */ > typedef uint32_t xfs_agino_t; /* inode # within allocation grp */ > typedef uint32_t xfs_extlen_t; /* extent length in blocks */ > typedef uint32_t xfs_agnumber_t; /* allocation group number */ > -typedef int32_t xfs_extnum_t; /* # of extents in a file */ > +typedef uint64_t xfs_extnum_t; /* # of extents in a file */ > typedef int16_t xfs_aextnum_t; /* # extents in an attribute fork */ > typedef int64_t xfs_fsize_t; /* bytes in a file */ > typedef uint64_t xfs_ufsize_t; /* unsigned bytes in a file */ > @@ -59,7 +59,8 @@ typedef void * xfs_failaddr_t; > * Max values for extlen, extnum, aextnum. > */ > #define MAXEXTLEN ((xfs_extlen_t)0x001fffff) /* 21 bits */ > -#define MAXEXTNUM ((xfs_extnum_t)0x7fffffff) /* signed int */ > +#define MAXEXTNUM31BIT ((xfs_extnum_t)0x7fffffff) /* 31 bits */ > +#define MAXEXTNUM ((xfs_extnum_t)0x7fffffffffff) /* 47 bits */ > #define MAXDIREXTNUM ((xfs_extnum_t)0x7ffffff) /* 27 bits */ > #define MAXAEXTNUM ((xfs_aextnum_t)0x7fff) /* signed short */ > > diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c > index 6d483ab29e63..be41fd242ff2 100644 > --- a/fs/xfs/scrub/inode.c > +++ b/fs/xfs/scrub/inode.c > @@ -205,8 +205,8 @@ xchk_dinode( > struct xfs_mount *mp = sc->mp; > size_t fork_recs; > unsigned long long isize; > + xfs_extnum_t nextents; > uint64_t flags2; > - uint32_t nextents; > uint16_t flags; > uint16_t mode; > > @@ -354,7 +354,7 @@ xchk_dinode( > xchk_inode_extsize(sc, dip, ino, mode, flags); > > /* di_nextents */ > - nextents = be32_to_cpu(dip->di_nextents); > + nextents = xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK); > fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); > switch (dip->di_format) { > case XFS_DINODE_FMT_EXTENTS: > @@ -464,6 +464,7 @@ xchk_inode_xref_bmap( > struct xfs_scrub *sc, > struct xfs_dinode *dip) > { > + xfs_mount_t *mp = sc->mp; struct xfs_mount. The structure typedefs usages are deprecated and we're trying to get rid of them (slowly). --D > xfs_extnum_t nextents; > xfs_filblks_t count; > xfs_filblks_t acount; > @@ -477,14 +478,14 @@ xchk_inode_xref_bmap( > &nextents, &count); > if (!xchk_should_check_xref(sc, &error, NULL)) > return; > - if (nextents < be32_to_cpu(dip->di_nextents)) > + if (nextents < xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK)) > xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); > > error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, > &nextents, &acount); > if (!xchk_should_check_xref(sc, &error, NULL)) > return; > - if (nextents != be16_to_cpu(dip->di_anextents)) > + if (nextents != xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK)) > xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); > > /* Check nblocks against the inode. */ > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > index 64f5f9a440ae..4418a66cf6d6 100644 > --- a/fs/xfs/xfs_inode.c > +++ b/fs/xfs/xfs_inode.c > @@ -3748,7 +3748,7 @@ xfs_iflush_int( > ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { > xfs_alert_tag(mp, XFS_PTAG_IFLUSH, > "%s: detected corrupt incore inode %Lu, " > - "total extents = %d, nblocks = %Ld, ptr "PTR_FMT, > + "total extents = %llu, nblocks = %Ld, ptr "PTR_FMT, > __func__, ip->i_ino, > ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp), > ip->i_d.di_nblocks, ip); > @@ -3785,6 +3785,10 @@ xfs_iflush_int( > xfs_ifork_verify_local_attr(ip)) > goto flush_out; > > + if (!(ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > + && xfs_sb_version_has47bitext(&mp->m_sb)) > + ip->i_d.di_flags2 |= XFS_DIFLAG2_47BIT_NEXTENTS; > + > /* > * Copy the dirty parts of the inode into the on-disk inode. We always > * copy out the core of the inode, because if the inode is dirty at all > diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c > index ba47bf65b772..6f27ac7c8631 100644 > --- a/fs/xfs/xfs_inode_item.c > +++ b/fs/xfs/xfs_inode_item.c > @@ -326,7 +326,7 @@ xfs_inode_to_log_dinode( > to->di_size = from->di_size; > to->di_nblocks = from->di_nblocks; > to->di_extsize = from->di_extsize; > - to->di_nextents = xfs_ifork_nextents(&ip->i_df); > + to->di_nextents_lo = xfs_ifork_nextents(&ip->i_df) & 0xffffffffU; > to->di_anextents = xfs_ifork_nextents(ip->i_afp); > to->di_forkoff = from->di_forkoff; > to->di_aformat = xfs_ifork_format(ip->i_afp); > @@ -344,6 +344,9 @@ xfs_inode_to_log_dinode( > to->di_crtime.t_nsec = from->di_crtime.tv_nsec; > to->di_flags2 = from->di_flags2; > to->di_cowextsize = from->di_cowextsize; > + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > + to->di_nextents_hi = > + xfs_ifork_nextents(&ip->i_df) >> 32; > to->di_ino = ip->i_ino; > to->di_lsn = lsn; > memset(to->di_pad2, 0, sizeof(to->di_pad2)); > diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c > index 10ef5ddf5429..8d64b861fb66 100644 > --- a/fs/xfs/xfs_inode_item_recover.c > +++ b/fs/xfs/xfs_inode_item_recover.c > @@ -134,6 +134,7 @@ xlog_recover_inode_commit_pass2( > struct xfs_log_dinode *ldip; > uint isize; > int need_free = 0; > + xfs_extnum_t nextents; > > if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { > in_f = item->ri_buf[0].i_addr; > @@ -255,16 +256,23 @@ xlog_recover_inode_commit_pass2( > goto out_release; > } > } > - if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){ > + > + nextents = ldip->di_nextents_lo; > + if (xfs_sb_version_has_v3inode(&mp->m_sb) && > + ldip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > + nextents |= ((u64)(ldip->di_nextents_hi) << 32); > + > + nextents += ldip->di_anextents; > + > + if (unlikely(nextents > ldip->di_nblocks)) { > XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", > XFS_ERRLEVEL_LOW, mp, ldip, > sizeof(*ldip)); > xfs_alert(mp, > "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " > - "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", > + "dino bp "PTR_FMT", ino %Ld, total extents = %llu, nblocks = %Ld", > __func__, item, dip, bp, in_f->ilf_ino, > - ldip->di_nextents + ldip->di_anextents, > - ldip->di_nblocks); > + nextents, ldip->di_nblocks); > error = -EFSCORRUPTED; > goto out_release; > } > -- > 2.20.1 >
On Monday 8 June 2020 10:44:10 PM IST Darrick J. Wong wrote: > On Sat, Jun 06, 2020 at 01:57:44PM +0530, Chandan Babu R wrote: > > This commit extends the per-inode data extent counter to 47 bits. The > > length of 47-bits was chosen because, > > Maximum file size = 2^63. > > Maximum extent count when using 64k block size = 2^63 / 2^16 = 2^47. > > > > The following changes are made to accomplish this, > > 1. A new ro-compat superblock flag to prevent older kernels from > > mounting the filesystem in read-write mode. This flag is set for the > > first time when an inode would end up having more than 2^31 extents. > > 3. Carve out a new 32-bit field from xfs_dinode->di_pad2[]. This field > > holds the most significant 15 bits of the data extent counter. > > On a 1k block V5 fs, the maximum extent count is 2^(63-10) = 2^53. > > If you're going to allocate 32 bits of space from di_pad2 to expand the > data fork's nextents, let's use the entire bitspace. But 2^53 extents will be beyond the limit of number of extents possible for a 64k blocksized filesystem? > > > 2. A new inode->di_flags2 flag to indicate that the newly added field > > contains valid data. This flag is set when one of the following two > > conditions are met, > > - When the inode is about to have more than 2^31 extents. > > - When flushing the incore inode (See xfs_iflush_int()), if > > the superblock ro-compat flag is already set. > > > > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > > --- > > fs/xfs/libxfs/xfs_bmap.c | 40 ++++++++-------- > > fs/xfs/libxfs/xfs_format.h | 30 ++++++++---- > > fs/xfs/libxfs/xfs_inode_buf.c | 46 +++++++++++++++--- > > fs/xfs/libxfs/xfs_inode_buf.h | 2 + > > fs/xfs/libxfs/xfs_inode_fork.c | 84 ++++++++++++++++++++++++++------- > > fs/xfs/libxfs/xfs_inode_fork.h | 3 +- > > fs/xfs/libxfs/xfs_log_format.h | 5 +- > > fs/xfs/libxfs/xfs_types.h | 5 +- > > fs/xfs/scrub/inode.c | 9 ++-- > > fs/xfs/xfs_inode.c | 6 ++- > > fs/xfs/xfs_inode_item.c | 5 +- > > fs/xfs/xfs_inode_item_recover.c | 16 +++++-- > > 12 files changed, 184 insertions(+), 67 deletions(-) > > > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > > index f75b70ae7b1f..73e552678adc 100644 > > --- a/fs/xfs/libxfs/xfs_bmap.c > > +++ b/fs/xfs/libxfs/xfs_bmap.c > > @@ -53,9 +53,9 @@ xfs_bmap_compute_maxlevels( > > int whichfork, /* data or attr fork */ > > int dir_bmbt) /* Dir or non-dir data fork */ > > { > > + uint64_t maxleafents; /* max leaf entries possible */ > > int level; /* btree level */ > > uint maxblocks; /* max blocks at this level */ > > - uint maxleafents; /* max leaf entries possible */ > > int maxrootrecs; /* max records in root block */ > > int minleafrecs; /* min records in leaf block */ > > int minnoderecs; /* min records in node block */ > > @@ -477,7 +477,7 @@ xfs_bmap_check_leaf_extents( > > if (bp_release) > > xfs_trans_brelse(NULL, bp); > > error_norelse: > > - xfs_warn(mp, "%s: BAD after btree leaves for %d extents", > > + xfs_warn(mp, "%s: BAD after btree leaves for %llu extents", > > __func__, i); > > xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__); > > xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); > > @@ -918,7 +918,7 @@ xfs_bmap_local_to_extents( > > xfs_iext_first(ifp, &icur); > > xfs_iext_insert(ip, &icur, &rec, 0); > > > > - error = xfs_next_set(ip, whichfork, 1); > > + error = xfs_next_set(tp, ip, whichfork, 1); > > if (error) > > goto done; > > > > @@ -1610,7 +1610,7 @@ xfs_bmap_add_extent_delay_real( > > xfs_iext_prev(ifp, &bma->icur); > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); > > > > - error = xfs_next_set(bma->ip, whichfork, -1); > > + error = xfs_next_set(bma->tp, bma->ip, whichfork, -1); > > if (error) > > goto done; > > > > @@ -1717,7 +1717,7 @@ xfs_bmap_add_extent_delay_real( > > PREV.br_state = new->br_state; > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); > > > > - error = xfs_next_set(bma->ip, whichfork, 1); > > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > > if (error) > > goto done; > > > > @@ -1786,7 +1786,7 @@ xfs_bmap_add_extent_delay_real( > > */ > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > > - error = xfs_next_set(bma->ip, whichfork, 1); > > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > > if (error) > > goto done; > > > > @@ -1876,7 +1876,7 @@ xfs_bmap_add_extent_delay_real( > > */ > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > > - error = xfs_next_set(bma->ip, whichfork, 1); > > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > > if (error) > > goto done; > > > > @@ -1965,7 +1965,7 @@ xfs_bmap_add_extent_delay_real( > > xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); > > xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); > > > > - error = xfs_next_set(bma->ip, whichfork, 1); > > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > > if (error) > > goto done; > > > > @@ -2172,7 +2172,7 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > > - error = xfs_next_set(ip, whichfork, -2); > > + error = xfs_next_set(tp, ip, whichfork, -2); > > if (error) > > goto done; > > > > @@ -2228,7 +2228,7 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > > - error = xfs_next_set(ip, whichfork, -1); > > + error = xfs_next_set(tp, ip, whichfork, -1); > > if (error) > > goto done; > > > > @@ -2274,7 +2274,7 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > > - error = xfs_next_set(ip, whichfork, -1); > > + error = xfs_next_set(tp, ip, whichfork, -1); > > if (error) > > goto done; > > > > @@ -2385,7 +2385,7 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_update_extent(ip, state, icur, &PREV); > > xfs_iext_insert(ip, icur, new, state); > > > > - error = xfs_next_set(ip, whichfork, 1); > > + error = xfs_next_set(tp, ip, whichfork, 1); > > if (error) > > goto done; > > > > @@ -2464,7 +2464,7 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_next(ifp, icur); > > xfs_iext_insert(ip, icur, new, state); > > > > - error = xfs_next_set(ip, whichfork, 1); > > + error = xfs_next_set(tp, ip, whichfork, 1); > > if (error) > > goto done; > > > > @@ -2519,7 +2519,7 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_insert(ip, icur, &r[1], state); > > xfs_iext_insert(ip, icur, &r[0], state); > > > > - error = xfs_next_set(ip, whichfork, 2); > > + error = xfs_next_set(tp, ip, whichfork, 2); > > if (error) > > goto done; > > > > @@ -2838,7 +2838,7 @@ xfs_bmap_add_extent_hole_real( > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &left); > > > > - error = xfs_next_set(ip, whichfork, -1); > > + error = xfs_next_set(tp, ip, whichfork, -1); > > if (error) > > goto done; > > > > @@ -2940,7 +2940,7 @@ xfs_bmap_add_extent_hole_real( > > */ > > xfs_iext_insert(ip, icur, new, state); > > > > - error = xfs_next_set(ip, whichfork, 1); > > + error = xfs_next_set(tp, ip, whichfork, 1); > > if (error) > > goto done; > > > > @@ -5140,7 +5140,7 @@ xfs_bmap_del_extent_real( > > xfs_iext_remove(ip, icur, state); > > xfs_iext_prev(ifp, icur); > > > > - error = xfs_next_set(ip, whichfork, -1); > > + error = xfs_next_set(tp, ip, whichfork, -1); > > if (error) > > goto done; > > > > @@ -5252,7 +5252,7 @@ xfs_bmap_del_extent_real( > > } else > > flags |= xfs_ilog_fext(whichfork); > > > > - error = xfs_next_set(ip, whichfork, 1); > > + error = xfs_next_set(tp, ip, whichfork, 1); > > if (error) > > goto done; > > > > @@ -5722,7 +5722,7 @@ xfs_bmse_merge( > > * Update the on-disk extent count, the btree if necessary and log the > > * inode. > > */ > > - error = xfs_next_set(ip, whichfork, -1); > > + error = xfs_next_set(tp, ip, whichfork, -1); > > if (error) > > goto done; > > > > @@ -6113,7 +6113,7 @@ xfs_bmap_split_extent( > > xfs_iext_next(ifp, &icur); > > xfs_iext_insert(ip, &icur, &new, 0); > > > > - error = xfs_next_set(ip, whichfork, 1); > > + error = xfs_next_set(tp, ip, whichfork, 1); > > if (error) > > goto del_cursor; > > > > diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h > > index b42a52bfa1e9..91bee33aa988 100644 > > --- a/fs/xfs/libxfs/xfs_format.h > > +++ b/fs/xfs/libxfs/xfs_format.h > > @@ -449,10 +449,12 @@ xfs_sb_has_compat_feature( > > #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ > > #define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ > > #define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */ > > +#define XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR (1 << 3) /* 47bit data extents */ > > I wonder if we could come up with a better name for this... > > DFORK_EXTENTHI > > Hmm... > > BIG_DFORK > > Hmmm... > > ULTRAFRAG > > There we go. "XFS with UltraFrag, part of this complete g@m3r t00lk1t." ;) > > ... > > (What do you think of the second suggestion?) I like the name DFORK_EXTENTHI since it signifies that we are now using the "_HI" field of the extent counter and it can also be used to convey the same for the attr extent counter as well. Thanks for the suggestions. > > > #define XFS_SB_FEAT_RO_COMPAT_ALL \ > > (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ > > XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ > > - XFS_SB_FEAT_RO_COMPAT_REFLINK) > > + XFS_SB_FEAT_RO_COMPAT_REFLINK | \ > > + XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR) > > #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL > > static inline bool > > xfs_sb_has_ro_compat_feature( > > @@ -563,6 +565,18 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp) > > (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK); > > } > > > > +static inline bool xfs_sb_version_has47bitext(struct xfs_sb *sbp) > > +{ > > + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && > > + (sbp->sb_features_ro_compat & > > + XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR); > > +} > > + > > +static inline void xfs_sb_version_add47bitext(struct xfs_sb *sbp) > > +{ > > + sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR; > > +} > > + > > /* > > * end of superblock version macros > > */ > > @@ -873,7 +887,7 @@ typedef struct xfs_dinode { > > __be64 di_size; /* number of bytes in file */ > > __be64 di_nblocks; /* # of direct & btree blocks used */ > > __be32 di_extsize; /* basic/minimum extent size for file */ > > - __be32 di_nextents; /* number of extents in data fork */ > > + __be32 di_nextents_lo; /* number of extents in data fork */ > > __be16 di_anextents; /* number of extents in attribute fork*/ > > __u8 di_forkoff; /* attr fork offs, <<3 for 64b align */ > > __s8 di_aformat; /* format of attr fork's data */ > > @@ -891,7 +905,8 @@ typedef struct xfs_dinode { > > __be64 di_lsn; /* flush sequence */ > > __be64 di_flags2; /* more random flags */ > > __be32 di_cowextsize; /* basic cow extent size for file */ > > - __u8 di_pad2[12]; /* more padding for future expansion */ > > + __be32 di_nextents_hi; > > + __u8 di_pad2[8]; /* more padding for future expansion */ > > > > /* fields only written to during inode creation */ > > xfs_timestamp_t di_crtime; /* time created */ > > @@ -992,10 +1007,6 @@ enum xfs_dinode_fmt { > > ((w) == XFS_DATA_FORK ? \ > > (dip)->di_format : \ > > (dip)->di_aformat) > > -#define XFS_DFORK_NEXTENTS(dip,w) \ > > - ((w) == XFS_DATA_FORK ? \ > > - be32_to_cpu((dip)->di_nextents) : \ > > - be16_to_cpu((dip)->di_anextents)) > > > > /* > > * For block and character special files the 32bit dev_t is stored at the > > @@ -1061,12 +1072,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) > > #define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */ > > #define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ > > #define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ > > +#define XFS_DIFLAG2_47BIT_NEXTENTS_BIT 3 /* Uses di_nextents_hi field */ > > #define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) > > #define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) > > #define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) > > +#define XFS_DIFLAG2_47BIT_NEXTENTS (1 << XFS_DIFLAG2_47BIT_NEXTENTS_BIT) > > > > #define XFS_DIFLAG2_ANY \ > > - (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE) > > + (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ > > + XFS_DIFLAG2_47BIT_NEXTENTS) > > > > /* > > * Inode number format: > > diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c > > index 6f84ea85fdd8..8b89fe080f70 100644 > > --- a/fs/xfs/libxfs/xfs_inode_buf.c > > +++ b/fs/xfs/libxfs/xfs_inode_buf.c > > @@ -307,7 +307,8 @@ xfs_inode_to_disk( > > to->di_size = cpu_to_be64(from->di_size); > > to->di_nblocks = cpu_to_be64(from->di_nblocks); > > to->di_extsize = cpu_to_be32(from->di_extsize); > > - to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); > > + to->di_nextents_lo = cpu_to_be32(xfs_ifork_nextents(&ip->i_df) & > > + 0xffffffffU); > > to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp)); > > to->di_forkoff = from->di_forkoff; > > to->di_aformat = xfs_ifork_format(ip->i_afp); > > @@ -322,6 +323,10 @@ xfs_inode_to_disk( > > to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec); > > to->di_flags2 = cpu_to_be64(from->di_flags2); > > to->di_cowextsize = cpu_to_be32(from->di_cowextsize); > > + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > + to->di_nextents_hi > > + = cpu_to_be32(xfs_ifork_nextents(&ip->i_df) > > + >> 32); > > /me kinda hates the indentation here, would a convenience variable > reduce the amount of linewrapping here? I will use a variable here as you have suggested. > > Oh, right, we're in a new epoch now; just go past 80 columns. > > > to->di_ino = cpu_to_be64(ip->i_ino); > > to->di_lsn = cpu_to_be64(lsn); > > memset(to->di_pad2, 0, sizeof(to->di_pad2)); > > @@ -360,7 +365,7 @@ xfs_log_dinode_to_disk( > > to->di_size = cpu_to_be64(from->di_size); > > to->di_nblocks = cpu_to_be64(from->di_nblocks); > > to->di_extsize = cpu_to_be32(from->di_extsize); > > - to->di_nextents = cpu_to_be32(from->di_nextents); > > + to->di_nextents_lo = cpu_to_be32(from->di_nextents_lo); > > to->di_anextents = cpu_to_be16(from->di_anextents); > > to->di_forkoff = from->di_forkoff; > > to->di_aformat = from->di_aformat; > > @@ -375,6 +380,9 @@ xfs_log_dinode_to_disk( > > to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); > > to->di_flags2 = cpu_to_be64(from->di_flags2); > > to->di_cowextsize = cpu_to_be32(from->di_cowextsize); > > + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > + to->di_nextents_hi = > > + cpu_to_be32(from->di_nextents_hi); > > to->di_ino = cpu_to_be64(from->di_ino); > > to->di_lsn = cpu_to_be64(from->di_lsn); > > memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); > > @@ -391,7 +399,9 @@ xfs_dinode_verify_fork( > > struct xfs_mount *mp, > > int whichfork) > > { > > - uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork); > > + xfs_extnum_t di_nextents; > > + > > + di_nextents = xfs_dfork_nextents(&mp->m_sb, dip, whichfork); > > > > switch (XFS_DFORK_FORMAT(dip, whichfork)) { > > case XFS_DINODE_FMT_LOCAL: > > @@ -462,6 +472,8 @@ xfs_dinode_verify( > > uint16_t flags; > > uint64_t flags2; > > uint64_t di_size; > > + xfs_extnum_t nextents; > > + int64_t nblocks; > > > > if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) > > return __this_address; > > @@ -492,10 +504,12 @@ xfs_dinode_verify( > > if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) > > return __this_address; > > > > + nextents = xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK); > > + nextents += xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK); > > + nblocks = be64_to_cpu(dip->di_nblocks); > > + > > /* Fork checks carried over from xfs_iformat_fork */ > > - if (mode && > > - be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > > > - be64_to_cpu(dip->di_nblocks)) > > + if (mode && nextents > nblocks) > > return __this_address; > > > > if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) > > @@ -716,3 +730,23 @@ xfs_inode_validate_cowextsize( > > > > return NULL; > > } > > + > > +xfs_extnum_t > > +xfs_dfork_nextents( > > + struct xfs_sb *sbp, > > + struct xfs_dinode *dip, > > + int whichfork) > > +{ > > + xfs_extnum_t nextents; > > + > > + if (whichfork == XFS_DATA_FORK) { > > + nextents = be32_to_cpu(dip->di_nextents_lo); > > + if (xfs_sb_version_has_v3inode(sbp) > > + && (dip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)) > > Please don't align the second line of the if test with the if body. > > Or maybe just create a "xfs_inode_has_big_dfork" helper to encapsulate > this, like we do for reflink/hascow/realtime inodes. Ok. I will follow the style used for reflink inodes. > > > + nextents |= (u64)(be32_to_cpu(dip->di_nextents_hi)) > > + << 32; > > + return nextents; > > + } else { > > + return be16_to_cpu(dip->di_anextents); > > I suspect you could reduce the indenting here by inverting the logic, > e.g. > > if (attr fork) > return be16_to_cpu(anextents); > > nextents = be32_to_cpu(nextents_lo); > if (xfs_inode_has_big_dfork()) > nextents += be32_to_cpu(nextents_hi); > return nextents; > The "else" part (i.e. attr fork) gets expanded in the next patch to contain code similar to the data fork. I will have to introduce the "if/else" branch logic once again in that patch. > > + } > > +} > > diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h > > index 865ac493c72a..4583db53b933 100644 > > --- a/fs/xfs/libxfs/xfs_inode_buf.h > > +++ b/fs/xfs/libxfs/xfs_inode_buf.h > > @@ -65,5 +65,7 @@ xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp, > > xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, > > uint32_t cowextsize, uint16_t mode, uint16_t flags, > > uint64_t flags2); > > +xfs_extnum_t xfs_dfork_nextents(struct xfs_sb *sbp, struct xfs_dinode *dip, > > + int whichfork); > > > > #endif /* __XFS_INODE_BUF_H__ */ > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c > > index 3bf5a2c391bd..ec682e2d5bcb 100644 > > --- a/fs/xfs/libxfs/xfs_inode_fork.c > > +++ b/fs/xfs/libxfs/xfs_inode_fork.c > > @@ -10,6 +10,7 @@ > > #include "xfs_format.h" > > #include "xfs_log_format.h" > > #include "xfs_trans_resv.h" > > +#include "xfs_sb.h" > > #include "xfs_mount.h" > > #include "xfs_inode.h" > > #include "xfs_trans.h" > > @@ -103,21 +104,22 @@ xfs_iformat_extents( > > int whichfork) > > { > > struct xfs_mount *mp = ip->i_mount; > > + struct xfs_sb *sb = &mp->m_sb; > > struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); > > + xfs_extnum_t nex = xfs_dfork_nextents(sb, dip, whichfork); > > int state = xfs_bmap_fork_to_state(whichfork); > > - int nex = XFS_DFORK_NEXTENTS(dip, whichfork); > > int size = nex * sizeof(xfs_bmbt_rec_t); > > struct xfs_iext_cursor icur; > > struct xfs_bmbt_rec *dp; > > struct xfs_bmbt_irec new; > > - int i; > > + xfs_extnum_t i; > > > > /* > > * If the number of extents is unreasonable, then something is wrong and > > * we just bail out rather than crash in kmem_alloc() or memcpy() below. > > */ > > if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) { > > - xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", > > + xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %llu).", > > (unsigned long long) ip->i_ino, nex); > > xfs_inode_verifier_error(ip, -EFSCORRUPTED, > > "xfs_iformat_extents(1)", dip, sizeof(*dip), > > @@ -233,7 +235,11 @@ xfs_iformat_data_fork( > > * depend on it. > > */ > > ip->i_df.if_format = dip->di_format; > > - ip->i_df.if_nextents = be32_to_cpu(dip->di_nextents); > > + ip->i_df.if_nextents = be32_to_cpu(dip->di_nextents_lo); > > + if (ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > + ip->i_df.if_nextents |= > > + ((u64)(be32_to_cpu(dip->di_nextents_hi)) << 32); > > + > > > > switch (inode->i_mode & S_IFMT) { > > case S_IFIFO: > > @@ -729,31 +735,73 @@ xfs_ifork_verify_local_attr( > > return 0; > > } > > > > +static int > > +xfs_next_set_data( > > + struct xfs_trans *tp, > > + struct xfs_inode *ip, > > + struct xfs_ifork *ifp, > > + int delta) > > +{ > > + struct xfs_mount *mp = ip->i_mount; > > + xfs_extnum_t nr_exts; > > + > > + nr_exts = ifp->if_nextents + delta; > > + > > + if ((delta > 0 && nr_exts > MAXEXTNUM) > > + || (delta < 0 && nr_exts > ifp->if_nextents)) > > + return -EOVERFLOW; > > + > > + if (ifp->if_nextents <= MAXEXTNUM31BIT && > > + nr_exts > MAXEXTNUM31BIT && > > + !(ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) && > > + xfs_sb_version_has_v3inode(&mp->m_sb)) { > > + if (!xfs_sb_version_has47bitext(&mp->m_sb)) { > > Urk. Again, don't indent the if test logic and the if body statements > to the same level. I am sorry. I will fixup the indentation issues. > > > + bool log_sb = false; > > + > > + spin_lock(&mp->m_sb_lock); > > + if (!xfs_sb_version_has47bitext(&mp->m_sb)) { > > + xfs_sb_version_add47bitext(&mp->m_sb); > > + log_sb = true; > > + } > > + spin_unlock(&mp->m_sb_lock); > > + > > + if (log_sb) > > + xfs_log_sb(tp); > > + } > > Hm, dynamic filesystem upgrade. This probably ought to log something to > dmesg about the upgrade. It might also be a better to make this a > separate helper so that it's not triply-indented. Ok. I will implement that. > > > + > > + ip->i_d.di_flags2 |= XFS_DIFLAG2_47BIT_NEXTENTS; > > + } > > + > > + ifp->if_nextents = nr_exts; > > + > > + return 0; > > +} > > + > > int > > xfs_next_set( > > + struct xfs_trans *tp, > > struct xfs_inode *ip, > > int whichfork, > > int delta) > > { > > struct xfs_ifork *ifp; > > int64_t nr_exts; > > - int64_t max_exts; > > + int error = 0; > > > > ifp = XFS_IFORK_PTR(ip, whichfork); > > > > - if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) > > - max_exts = MAXEXTNUM; > > - else if (whichfork == XFS_ATTR_FORK) > > - max_exts = MAXAEXTNUM; > > - else > > - ASSERT(0); > > - > > - nr_exts = ifp->if_nextents + delta; > > - if ((delta > 0 && nr_exts > max_exts) > > - || (delta < 0 && nr_exts < 0)) > > - return -EOVERFLOW; > > + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { > > + error = xfs_next_set_data(tp, ip, ifp, delta); > > + } else if (whichfork == XFS_ATTR_FORK) { > > + nr_exts = ifp->if_nextents + delta; > > + if ((delta > 0 && nr_exts > MAXAEXTNUM) > > + || (delta < 0 && nr_exts < 0)) > > + return -EOVERFLOW; > > > > - ifp->if_nextents = nr_exts; > > + ifp->if_nextents = nr_exts; > > + } else { > > + ASSERT(0); > > + } > > > > - return 0; > > + return error; > > } > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > > index a84ae42ace79..c74fa6371cc8 100644 > > --- a/fs/xfs/libxfs/xfs_inode_fork.h > > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > > @@ -173,5 +173,6 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); > > int xfs_ifork_verify_local_data(struct xfs_inode *ip); > > int xfs_ifork_verify_local_attr(struct xfs_inode *ip); > > > > -int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); > > +int xfs_next_set(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, > > + int delta); > > #endif /* __XFS_INODE_FORK_H__ */ > > diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h > > index e3400c9c71cd..879aadff7692 100644 > > --- a/fs/xfs/libxfs/xfs_log_format.h > > +++ b/fs/xfs/libxfs/xfs_log_format.h > > @@ -396,7 +396,7 @@ struct xfs_log_dinode { > > xfs_fsize_t di_size; /* number of bytes in file */ > > xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ > > xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ > > - xfs_extnum_t di_nextents; /* number of extents in data fork */ > > + uint32_t di_nextents_lo; /* number of extents in data fork */ > > xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ > > uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ > > int8_t di_aformat; /* format of attr fork's data */ > > @@ -414,7 +414,8 @@ struct xfs_log_dinode { > > xfs_lsn_t di_lsn; /* flush sequence */ > > uint64_t di_flags2; /* more random flags */ > > uint32_t di_cowextsize; /* basic cow extent size for file */ > > - uint8_t di_pad2[12]; /* more padding for future expansion */ > > + uint32_t di_nextents_hi; > > + uint8_t di_pad2[8]; /* more padding for future expansion */ > > > > /* fields only written to during inode creation */ > > xfs_ictimestamp_t di_crtime; /* time created */ > > diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h > > index 0a3041ad5bec..c68ff2178976 100644 > > --- a/fs/xfs/libxfs/xfs_types.h > > +++ b/fs/xfs/libxfs/xfs_types.h > > @@ -12,7 +12,7 @@ typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */ > > typedef uint32_t xfs_agino_t; /* inode # within allocation grp */ > > typedef uint32_t xfs_extlen_t; /* extent length in blocks */ > > typedef uint32_t xfs_agnumber_t; /* allocation group number */ > > -typedef int32_t xfs_extnum_t; /* # of extents in a file */ > > +typedef uint64_t xfs_extnum_t; /* # of extents in a file */ > > typedef int16_t xfs_aextnum_t; /* # extents in an attribute fork */ > > typedef int64_t xfs_fsize_t; /* bytes in a file */ > > typedef uint64_t xfs_ufsize_t; /* unsigned bytes in a file */ > > @@ -59,7 +59,8 @@ typedef void * xfs_failaddr_t; > > * Max values for extlen, extnum, aextnum. > > */ > > #define MAXEXTLEN ((xfs_extlen_t)0x001fffff) /* 21 bits */ > > -#define MAXEXTNUM ((xfs_extnum_t)0x7fffffff) /* signed int */ > > +#define MAXEXTNUM31BIT ((xfs_extnum_t)0x7fffffff) /* 31 bits */ > > +#define MAXEXTNUM ((xfs_extnum_t)0x7fffffffffff) /* 47 bits */ > > #define MAXDIREXTNUM ((xfs_extnum_t)0x7ffffff) /* 27 bits */ > > #define MAXAEXTNUM ((xfs_aextnum_t)0x7fff) /* signed short */ > > > > diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c > > index 6d483ab29e63..be41fd242ff2 100644 > > --- a/fs/xfs/scrub/inode.c > > +++ b/fs/xfs/scrub/inode.c > > @@ -205,8 +205,8 @@ xchk_dinode( > > struct xfs_mount *mp = sc->mp; > > size_t fork_recs; > > unsigned long long isize; > > + xfs_extnum_t nextents; > > uint64_t flags2; > > - uint32_t nextents; > > uint16_t flags; > > uint16_t mode; > > > > @@ -354,7 +354,7 @@ xchk_dinode( > > xchk_inode_extsize(sc, dip, ino, mode, flags); > > > > /* di_nextents */ > > - nextents = be32_to_cpu(dip->di_nextents); > > + nextents = xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK); > > fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); > > switch (dip->di_format) { > > case XFS_DINODE_FMT_EXTENTS: > > @@ -464,6 +464,7 @@ xchk_inode_xref_bmap( > > struct xfs_scrub *sc, > > struct xfs_dinode *dip) > > { > > + xfs_mount_t *mp = sc->mp; > > struct xfs_mount. The structure typedefs usages are deprecated and > we're trying to get rid of them (slowly). Yes, I missed out on this one. I will fix this up. > > --D > > > xfs_extnum_t nextents; > > xfs_filblks_t count; > > xfs_filblks_t acount; > > @@ -477,14 +478,14 @@ xchk_inode_xref_bmap( > > &nextents, &count); > > if (!xchk_should_check_xref(sc, &error, NULL)) > > return; > > - if (nextents < be32_to_cpu(dip->di_nextents)) > > + if (nextents < xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK)) > > xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); > > > > error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, > > &nextents, &acount); > > if (!xchk_should_check_xref(sc, &error, NULL)) > > return; > > - if (nextents != be16_to_cpu(dip->di_anextents)) > > + if (nextents != xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK)) > > xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); > > > > /* Check nblocks against the inode. */ > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > > index 64f5f9a440ae..4418a66cf6d6 100644 > > --- a/fs/xfs/xfs_inode.c > > +++ b/fs/xfs/xfs_inode.c > > @@ -3748,7 +3748,7 @@ xfs_iflush_int( > > ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { > > xfs_alert_tag(mp, XFS_PTAG_IFLUSH, > > "%s: detected corrupt incore inode %Lu, " > > - "total extents = %d, nblocks = %Ld, ptr "PTR_FMT, > > + "total extents = %llu, nblocks = %Ld, ptr "PTR_FMT, > > __func__, ip->i_ino, > > ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp), > > ip->i_d.di_nblocks, ip); > > @@ -3785,6 +3785,10 @@ xfs_iflush_int( > > xfs_ifork_verify_local_attr(ip)) > > goto flush_out; > > > > + if (!(ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > + && xfs_sb_version_has47bitext(&mp->m_sb)) > > + ip->i_d.di_flags2 |= XFS_DIFLAG2_47BIT_NEXTENTS; > > + > > /* > > * Copy the dirty parts of the inode into the on-disk inode. We always > > * copy out the core of the inode, because if the inode is dirty at all > > diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c > > index ba47bf65b772..6f27ac7c8631 100644 > > --- a/fs/xfs/xfs_inode_item.c > > +++ b/fs/xfs/xfs_inode_item.c > > @@ -326,7 +326,7 @@ xfs_inode_to_log_dinode( > > to->di_size = from->di_size; > > to->di_nblocks = from->di_nblocks; > > to->di_extsize = from->di_extsize; > > - to->di_nextents = xfs_ifork_nextents(&ip->i_df); > > + to->di_nextents_lo = xfs_ifork_nextents(&ip->i_df) & 0xffffffffU; > > to->di_anextents = xfs_ifork_nextents(ip->i_afp); > > to->di_forkoff = from->di_forkoff; > > to->di_aformat = xfs_ifork_format(ip->i_afp); > > @@ -344,6 +344,9 @@ xfs_inode_to_log_dinode( > > to->di_crtime.t_nsec = from->di_crtime.tv_nsec; > > to->di_flags2 = from->di_flags2; > > to->di_cowextsize = from->di_cowextsize; > > + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > + to->di_nextents_hi = > > + xfs_ifork_nextents(&ip->i_df) >> 32; > > to->di_ino = ip->i_ino; > > to->di_lsn = lsn; > > memset(to->di_pad2, 0, sizeof(to->di_pad2)); > > diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c > > index 10ef5ddf5429..8d64b861fb66 100644 > > --- a/fs/xfs/xfs_inode_item_recover.c > > +++ b/fs/xfs/xfs_inode_item_recover.c > > @@ -134,6 +134,7 @@ xlog_recover_inode_commit_pass2( > > struct xfs_log_dinode *ldip; > > uint isize; > > int need_free = 0; > > + xfs_extnum_t nextents; > > > > if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { > > in_f = item->ri_buf[0].i_addr; > > @@ -255,16 +256,23 @@ xlog_recover_inode_commit_pass2( > > goto out_release; > > } > > } > > - if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){ > > + > > + nextents = ldip->di_nextents_lo; > > + if (xfs_sb_version_has_v3inode(&mp->m_sb) && > > + ldip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > + nextents |= ((u64)(ldip->di_nextents_hi) << 32); > > + > > + nextents += ldip->di_anextents; > > + > > + if (unlikely(nextents > ldip->di_nblocks)) { > > XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", > > XFS_ERRLEVEL_LOW, mp, ldip, > > sizeof(*ldip)); > > xfs_alert(mp, > > "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " > > - "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", > > + "dino bp "PTR_FMT", ino %Ld, total extents = %llu, nblocks = %Ld", > > __func__, item, dip, bp, in_f->ilf_ino, > > - ldip->di_nextents + ldip->di_anextents, > > - ldip->di_nblocks); > > + nextents, ldip->di_nblocks); > > error = -EFSCORRUPTED; > > goto out_release; > > } >
On Sat, Jun 06, 2020 at 01:57:44PM +0530, Chandan Babu R wrote: > This commit extends the per-inode data extent counter to 47 bits. The > length of 47-bits was chosen because, > Maximum file size = 2^63. > Maximum extent count when using 64k block size = 2^63 / 2^16 = 2^47. What is the use case for a large nuber of extents? I'm not sure why we'd want to bother, but if there is a good reason it really should be documented here.
On Friday 19 June 2020 8:08:46 PM IST Christoph Hellwig wrote: > On Sat, Jun 06, 2020 at 01:57:44PM +0530, Chandan Babu R wrote: > > This commit extends the per-inode data extent counter to 47 bits. The > > length of 47-bits was chosen because, > > Maximum file size = 2^63. > > Maximum extent count when using 64k block size = 2^63 / 2^16 = 2^47. > > What is the use case for a large nuber of extents? I'm not sure why > we'd want to bother, but if there is a good reason it really should > be documented here. > > Late last year, Dave had pointed me to the commit "xfs: fix inode fork extent count overflow" (3f8a4f1d876d3e3e49e50b0396eaffcc4ba71b08) where the following scenario is described, Fallocate 40TiB of disk space and then alternatively punch out fs blocks. Assuming 4k block size, this would give, 40TiB / 4k / 2 = ~5 billion extents. This won't fit into a unsigned 32-bit field which can hold a maximum value of ~4 billion. Dave mentioned that we will go over the 32-bit extent counter limit soon. Hence this patch extends the on-disk data fork extent counter to a 64-bit field. In my next version of this patchset, I will add the technical part of the above description to the patch. Sorry for missing that out.
On Tue, Jun 09, 2020 at 07:53:05PM +0530, Chandan Babu R wrote: > On Monday 8 June 2020 10:44:10 PM IST Darrick J. Wong wrote: > > On Sat, Jun 06, 2020 at 01:57:44PM +0530, Chandan Babu R wrote: > > > This commit extends the per-inode data extent counter to 47 bits. The > > > length of 47-bits was chosen because, > > > Maximum file size = 2^63. > > > Maximum extent count when using 64k block size = 2^63 / 2^16 = 2^47. > > > > > > The following changes are made to accomplish this, > > > 1. A new ro-compat superblock flag to prevent older kernels from > > > mounting the filesystem in read-write mode. This flag is set for the > > > first time when an inode would end up having more than 2^31 extents. > > > 3. Carve out a new 32-bit field from xfs_dinode->di_pad2[]. This field > > > holds the most significant 15 bits of the data extent counter. > > > > On a 1k block V5 fs, the maximum extent count is 2^(63-10) = 2^53. > > > > If you're going to allocate 32 bits of space from di_pad2 to expand the > > data fork's nextents, let's use the entire bitspace. > > But 2^53 extents will be beyond the limit of number of extents possible for a > 64k blocksized filesystem? That is true, but what about 4k block filesystems? What about 1k block filesystems? (Yeah, sorry I forgot to reply) --D > > > > > 2. A new inode->di_flags2 flag to indicate that the newly added field > > > contains valid data. This flag is set when one of the following two > > > conditions are met, > > > - When the inode is about to have more than 2^31 extents. > > > - When flushing the incore inode (See xfs_iflush_int()), if > > > the superblock ro-compat flag is already set. > > > > > > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > > > --- > > > fs/xfs/libxfs/xfs_bmap.c | 40 ++++++++-------- > > > fs/xfs/libxfs/xfs_format.h | 30 ++++++++---- > > > fs/xfs/libxfs/xfs_inode_buf.c | 46 +++++++++++++++--- > > > fs/xfs/libxfs/xfs_inode_buf.h | 2 + > > > fs/xfs/libxfs/xfs_inode_fork.c | 84 ++++++++++++++++++++++++++------- > > > fs/xfs/libxfs/xfs_inode_fork.h | 3 +- > > > fs/xfs/libxfs/xfs_log_format.h | 5 +- > > > fs/xfs/libxfs/xfs_types.h | 5 +- > > > fs/xfs/scrub/inode.c | 9 ++-- > > > fs/xfs/xfs_inode.c | 6 ++- > > > fs/xfs/xfs_inode_item.c | 5 +- > > > fs/xfs/xfs_inode_item_recover.c | 16 +++++-- > > > 12 files changed, 184 insertions(+), 67 deletions(-) > > > > > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > > > index f75b70ae7b1f..73e552678adc 100644 > > > --- a/fs/xfs/libxfs/xfs_bmap.c > > > +++ b/fs/xfs/libxfs/xfs_bmap.c > > > @@ -53,9 +53,9 @@ xfs_bmap_compute_maxlevels( > > > int whichfork, /* data or attr fork */ > > > int dir_bmbt) /* Dir or non-dir data fork */ > > > { > > > + uint64_t maxleafents; /* max leaf entries possible */ > > > int level; /* btree level */ > > > uint maxblocks; /* max blocks at this level */ > > > - uint maxleafents; /* max leaf entries possible */ > > > int maxrootrecs; /* max records in root block */ > > > int minleafrecs; /* min records in leaf block */ > > > int minnoderecs; /* min records in node block */ > > > @@ -477,7 +477,7 @@ xfs_bmap_check_leaf_extents( > > > if (bp_release) > > > xfs_trans_brelse(NULL, bp); > > > error_norelse: > > > - xfs_warn(mp, "%s: BAD after btree leaves for %d extents", > > > + xfs_warn(mp, "%s: BAD after btree leaves for %llu extents", > > > __func__, i); > > > xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__); > > > xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); > > > @@ -918,7 +918,7 @@ xfs_bmap_local_to_extents( > > > xfs_iext_first(ifp, &icur); > > > xfs_iext_insert(ip, &icur, &rec, 0); > > > > > > - error = xfs_next_set(ip, whichfork, 1); > > > + error = xfs_next_set(tp, ip, whichfork, 1); > > > if (error) > > > goto done; > > > > > > @@ -1610,7 +1610,7 @@ xfs_bmap_add_extent_delay_real( > > > xfs_iext_prev(ifp, &bma->icur); > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); > > > > > > - error = xfs_next_set(bma->ip, whichfork, -1); > > > + error = xfs_next_set(bma->tp, bma->ip, whichfork, -1); > > > if (error) > > > goto done; > > > > > > @@ -1717,7 +1717,7 @@ xfs_bmap_add_extent_delay_real( > > > PREV.br_state = new->br_state; > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); > > > > > > - error = xfs_next_set(bma->ip, whichfork, 1); > > > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > > > if (error) > > > goto done; > > > > > > @@ -1786,7 +1786,7 @@ xfs_bmap_add_extent_delay_real( > > > */ > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > > > > - error = xfs_next_set(bma->ip, whichfork, 1); > > > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > > > if (error) > > > goto done; > > > > > > @@ -1876,7 +1876,7 @@ xfs_bmap_add_extent_delay_real( > > > */ > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > > > > - error = xfs_next_set(bma->ip, whichfork, 1); > > > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > > > if (error) > > > goto done; > > > > > > @@ -1965,7 +1965,7 @@ xfs_bmap_add_extent_delay_real( > > > xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); > > > xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); > > > > > > - error = xfs_next_set(bma->ip, whichfork, 1); > > > + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); > > > if (error) > > > goto done; > > > > > > @@ -2172,7 +2172,7 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > > > > - error = xfs_next_set(ip, whichfork, -2); > > > + error = xfs_next_set(tp, ip, whichfork, -2); > > > if (error) > > > goto done; > > > > > > @@ -2228,7 +2228,7 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > > > > - error = xfs_next_set(ip, whichfork, -1); > > > + error = xfs_next_set(tp, ip, whichfork, -1); > > > if (error) > > > goto done; > > > > > > @@ -2274,7 +2274,7 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > > > > - error = xfs_next_set(ip, whichfork, -1); > > > + error = xfs_next_set(tp, ip, whichfork, -1); > > > if (error) > > > goto done; > > > > > > @@ -2385,7 +2385,7 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > xfs_iext_insert(ip, icur, new, state); > > > > > > - error = xfs_next_set(ip, whichfork, 1); > > > + error = xfs_next_set(tp, ip, whichfork, 1); > > > if (error) > > > goto done; > > > > > > @@ -2464,7 +2464,7 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_next(ifp, icur); > > > xfs_iext_insert(ip, icur, new, state); > > > > > > - error = xfs_next_set(ip, whichfork, 1); > > > + error = xfs_next_set(tp, ip, whichfork, 1); > > > if (error) > > > goto done; > > > > > > @@ -2519,7 +2519,7 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_insert(ip, icur, &r[1], state); > > > xfs_iext_insert(ip, icur, &r[0], state); > > > > > > - error = xfs_next_set(ip, whichfork, 2); > > > + error = xfs_next_set(tp, ip, whichfork, 2); > > > if (error) > > > goto done; > > > > > > @@ -2838,7 +2838,7 @@ xfs_bmap_add_extent_hole_real( > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &left); > > > > > > - error = xfs_next_set(ip, whichfork, -1); > > > + error = xfs_next_set(tp, ip, whichfork, -1); > > > if (error) > > > goto done; > > > > > > @@ -2940,7 +2940,7 @@ xfs_bmap_add_extent_hole_real( > > > */ > > > xfs_iext_insert(ip, icur, new, state); > > > > > > - error = xfs_next_set(ip, whichfork, 1); > > > + error = xfs_next_set(tp, ip, whichfork, 1); > > > if (error) > > > goto done; > > > > > > @@ -5140,7 +5140,7 @@ xfs_bmap_del_extent_real( > > > xfs_iext_remove(ip, icur, state); > > > xfs_iext_prev(ifp, icur); > > > > > > - error = xfs_next_set(ip, whichfork, -1); > > > + error = xfs_next_set(tp, ip, whichfork, -1); > > > if (error) > > > goto done; > > > > > > @@ -5252,7 +5252,7 @@ xfs_bmap_del_extent_real( > > > } else > > > flags |= xfs_ilog_fext(whichfork); > > > > > > - error = xfs_next_set(ip, whichfork, 1); > > > + error = xfs_next_set(tp, ip, whichfork, 1); > > > if (error) > > > goto done; > > > > > > @@ -5722,7 +5722,7 @@ xfs_bmse_merge( > > > * Update the on-disk extent count, the btree if necessary and log the > > > * inode. > > > */ > > > - error = xfs_next_set(ip, whichfork, -1); > > > + error = xfs_next_set(tp, ip, whichfork, -1); > > > if (error) > > > goto done; > > > > > > @@ -6113,7 +6113,7 @@ xfs_bmap_split_extent( > > > xfs_iext_next(ifp, &icur); > > > xfs_iext_insert(ip, &icur, &new, 0); > > > > > > - error = xfs_next_set(ip, whichfork, 1); > > > + error = xfs_next_set(tp, ip, whichfork, 1); > > > if (error) > > > goto del_cursor; > > > > > > diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h > > > index b42a52bfa1e9..91bee33aa988 100644 > > > --- a/fs/xfs/libxfs/xfs_format.h > > > +++ b/fs/xfs/libxfs/xfs_format.h > > > @@ -449,10 +449,12 @@ xfs_sb_has_compat_feature( > > > #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ > > > #define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ > > > #define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */ > > > +#define XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR (1 << 3) /* 47bit data extents */ > > > > I wonder if we could come up with a better name for this... > > > > DFORK_EXTENTHI > > > > Hmm... > > > > BIG_DFORK > > > > Hmmm... > > > > ULTRAFRAG > > > > There we go. "XFS with UltraFrag, part of this complete g@m3r t00lk1t." ;) > > > > ... > > > > (What do you think of the second suggestion?) > > I like the name DFORK_EXTENTHI since it signifies that we are now using the > "_HI" field of the extent counter and it can also be used to convey the same > for the attr extent counter as well. Thanks for the suggestions. > > > > > > #define XFS_SB_FEAT_RO_COMPAT_ALL \ > > > (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ > > > XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ > > > - XFS_SB_FEAT_RO_COMPAT_REFLINK) > > > + XFS_SB_FEAT_RO_COMPAT_REFLINK | \ > > > + XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR) > > > #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL > > > static inline bool > > > xfs_sb_has_ro_compat_feature( > > > @@ -563,6 +565,18 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp) > > > (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK); > > > } > > > > > > +static inline bool xfs_sb_version_has47bitext(struct xfs_sb *sbp) > > > +{ > > > + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && > > > + (sbp->sb_features_ro_compat & > > > + XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR); > > > +} > > > + > > > +static inline void xfs_sb_version_add47bitext(struct xfs_sb *sbp) > > > +{ > > > + sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR; > > > +} > > > + > > > /* > > > * end of superblock version macros > > > */ > > > @@ -873,7 +887,7 @@ typedef struct xfs_dinode { > > > __be64 di_size; /* number of bytes in file */ > > > __be64 di_nblocks; /* # of direct & btree blocks used */ > > > __be32 di_extsize; /* basic/minimum extent size for file */ > > > - __be32 di_nextents; /* number of extents in data fork */ > > > + __be32 di_nextents_lo; /* number of extents in data fork */ > > > __be16 di_anextents; /* number of extents in attribute fork*/ > > > __u8 di_forkoff; /* attr fork offs, <<3 for 64b align */ > > > __s8 di_aformat; /* format of attr fork's data */ > > > @@ -891,7 +905,8 @@ typedef struct xfs_dinode { > > > __be64 di_lsn; /* flush sequence */ > > > __be64 di_flags2; /* more random flags */ > > > __be32 di_cowextsize; /* basic cow extent size for file */ > > > - __u8 di_pad2[12]; /* more padding for future expansion */ > > > + __be32 di_nextents_hi; > > > + __u8 di_pad2[8]; /* more padding for future expansion */ > > > > > > /* fields only written to during inode creation */ > > > xfs_timestamp_t di_crtime; /* time created */ > > > @@ -992,10 +1007,6 @@ enum xfs_dinode_fmt { > > > ((w) == XFS_DATA_FORK ? \ > > > (dip)->di_format : \ > > > (dip)->di_aformat) > > > -#define XFS_DFORK_NEXTENTS(dip,w) \ > > > - ((w) == XFS_DATA_FORK ? \ > > > - be32_to_cpu((dip)->di_nextents) : \ > > > - be16_to_cpu((dip)->di_anextents)) > > > > > > /* > > > * For block and character special files the 32bit dev_t is stored at the > > > @@ -1061,12 +1072,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) > > > #define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */ > > > #define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ > > > #define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ > > > +#define XFS_DIFLAG2_47BIT_NEXTENTS_BIT 3 /* Uses di_nextents_hi field */ > > > #define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) > > > #define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) > > > #define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) > > > +#define XFS_DIFLAG2_47BIT_NEXTENTS (1 << XFS_DIFLAG2_47BIT_NEXTENTS_BIT) > > > > > > #define XFS_DIFLAG2_ANY \ > > > - (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE) > > > + (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ > > > + XFS_DIFLAG2_47BIT_NEXTENTS) > > > > > > /* > > > * Inode number format: > > > diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c > > > index 6f84ea85fdd8..8b89fe080f70 100644 > > > --- a/fs/xfs/libxfs/xfs_inode_buf.c > > > +++ b/fs/xfs/libxfs/xfs_inode_buf.c > > > @@ -307,7 +307,8 @@ xfs_inode_to_disk( > > > to->di_size = cpu_to_be64(from->di_size); > > > to->di_nblocks = cpu_to_be64(from->di_nblocks); > > > to->di_extsize = cpu_to_be32(from->di_extsize); > > > - to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); > > > + to->di_nextents_lo = cpu_to_be32(xfs_ifork_nextents(&ip->i_df) & > > > + 0xffffffffU); > > > to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp)); > > > to->di_forkoff = from->di_forkoff; > > > to->di_aformat = xfs_ifork_format(ip->i_afp); > > > @@ -322,6 +323,10 @@ xfs_inode_to_disk( > > > to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec); > > > to->di_flags2 = cpu_to_be64(from->di_flags2); > > > to->di_cowextsize = cpu_to_be32(from->di_cowextsize); > > > + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > > + to->di_nextents_hi > > > + = cpu_to_be32(xfs_ifork_nextents(&ip->i_df) > > > + >> 32); > > > > /me kinda hates the indentation here, would a convenience variable > > reduce the amount of linewrapping here? > > I will use a variable here as you have suggested. > > > > > Oh, right, we're in a new epoch now; just go past 80 columns. > > > > > to->di_ino = cpu_to_be64(ip->i_ino); > > > to->di_lsn = cpu_to_be64(lsn); > > > memset(to->di_pad2, 0, sizeof(to->di_pad2)); > > > @@ -360,7 +365,7 @@ xfs_log_dinode_to_disk( > > > to->di_size = cpu_to_be64(from->di_size); > > > to->di_nblocks = cpu_to_be64(from->di_nblocks); > > > to->di_extsize = cpu_to_be32(from->di_extsize); > > > - to->di_nextents = cpu_to_be32(from->di_nextents); > > > + to->di_nextents_lo = cpu_to_be32(from->di_nextents_lo); > > > to->di_anextents = cpu_to_be16(from->di_anextents); > > > to->di_forkoff = from->di_forkoff; > > > to->di_aformat = from->di_aformat; > > > @@ -375,6 +380,9 @@ xfs_log_dinode_to_disk( > > > to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); > > > to->di_flags2 = cpu_to_be64(from->di_flags2); > > > to->di_cowextsize = cpu_to_be32(from->di_cowextsize); > > > + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > > + to->di_nextents_hi = > > > + cpu_to_be32(from->di_nextents_hi); > > > to->di_ino = cpu_to_be64(from->di_ino); > > > to->di_lsn = cpu_to_be64(from->di_lsn); > > > memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); > > > @@ -391,7 +399,9 @@ xfs_dinode_verify_fork( > > > struct xfs_mount *mp, > > > int whichfork) > > > { > > > - uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork); > > > + xfs_extnum_t di_nextents; > > > + > > > + di_nextents = xfs_dfork_nextents(&mp->m_sb, dip, whichfork); > > > > > > switch (XFS_DFORK_FORMAT(dip, whichfork)) { > > > case XFS_DINODE_FMT_LOCAL: > > > @@ -462,6 +472,8 @@ xfs_dinode_verify( > > > uint16_t flags; > > > uint64_t flags2; > > > uint64_t di_size; > > > + xfs_extnum_t nextents; > > > + int64_t nblocks; > > > > > > if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) > > > return __this_address; > > > @@ -492,10 +504,12 @@ xfs_dinode_verify( > > > if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) > > > return __this_address; > > > > > > + nextents = xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK); > > > + nextents += xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK); > > > + nblocks = be64_to_cpu(dip->di_nblocks); > > > + > > > /* Fork checks carried over from xfs_iformat_fork */ > > > - if (mode && > > > - be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > > > > - be64_to_cpu(dip->di_nblocks)) > > > + if (mode && nextents > nblocks) > > > return __this_address; > > > > > > if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) > > > @@ -716,3 +730,23 @@ xfs_inode_validate_cowextsize( > > > > > > return NULL; > > > } > > > + > > > +xfs_extnum_t > > > +xfs_dfork_nextents( > > > + struct xfs_sb *sbp, > > > + struct xfs_dinode *dip, > > > + int whichfork) > > > +{ > > > + xfs_extnum_t nextents; > > > + > > > + if (whichfork == XFS_DATA_FORK) { > > > + nextents = be32_to_cpu(dip->di_nextents_lo); > > > + if (xfs_sb_version_has_v3inode(sbp) > > > + && (dip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)) > > > > Please don't align the second line of the if test with the if body. > > > > Or maybe just create a "xfs_inode_has_big_dfork" helper to encapsulate > > this, like we do for reflink/hascow/realtime inodes. > > Ok. I will follow the style used for reflink inodes. > > > > > > + nextents |= (u64)(be32_to_cpu(dip->di_nextents_hi)) > > > + << 32; > > > + return nextents; > > > + } else { > > > + return be16_to_cpu(dip->di_anextents); > > > > I suspect you could reduce the indenting here by inverting the logic, > > e.g. > > > > if (attr fork) > > return be16_to_cpu(anextents); > > > > nextents = be32_to_cpu(nextents_lo); > > if (xfs_inode_has_big_dfork()) > > nextents += be32_to_cpu(nextents_hi); > > return nextents; > > > > The "else" part (i.e. attr fork) gets expanded in the next > patch to contain code similar to the data fork. I will have to introduce the > "if/else" branch logic once again in that patch. > > > > + } > > > +} > > > diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h > > > index 865ac493c72a..4583db53b933 100644 > > > --- a/fs/xfs/libxfs/xfs_inode_buf.h > > > +++ b/fs/xfs/libxfs/xfs_inode_buf.h > > > @@ -65,5 +65,7 @@ xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp, > > > xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, > > > uint32_t cowextsize, uint16_t mode, uint16_t flags, > > > uint64_t flags2); > > > +xfs_extnum_t xfs_dfork_nextents(struct xfs_sb *sbp, struct xfs_dinode *dip, > > > + int whichfork); > > > > > > #endif /* __XFS_INODE_BUF_H__ */ > > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c > > > index 3bf5a2c391bd..ec682e2d5bcb 100644 > > > --- a/fs/xfs/libxfs/xfs_inode_fork.c > > > +++ b/fs/xfs/libxfs/xfs_inode_fork.c > > > @@ -10,6 +10,7 @@ > > > #include "xfs_format.h" > > > #include "xfs_log_format.h" > > > #include "xfs_trans_resv.h" > > > +#include "xfs_sb.h" > > > #include "xfs_mount.h" > > > #include "xfs_inode.h" > > > #include "xfs_trans.h" > > > @@ -103,21 +104,22 @@ xfs_iformat_extents( > > > int whichfork) > > > { > > > struct xfs_mount *mp = ip->i_mount; > > > + struct xfs_sb *sb = &mp->m_sb; > > > struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); > > > + xfs_extnum_t nex = xfs_dfork_nextents(sb, dip, whichfork); > > > int state = xfs_bmap_fork_to_state(whichfork); > > > - int nex = XFS_DFORK_NEXTENTS(dip, whichfork); > > > int size = nex * sizeof(xfs_bmbt_rec_t); > > > struct xfs_iext_cursor icur; > > > struct xfs_bmbt_rec *dp; > > > struct xfs_bmbt_irec new; > > > - int i; > > > + xfs_extnum_t i; > > > > > > /* > > > * If the number of extents is unreasonable, then something is wrong and > > > * we just bail out rather than crash in kmem_alloc() or memcpy() below. > > > */ > > > if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) { > > > - xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", > > > + xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %llu).", > > > (unsigned long long) ip->i_ino, nex); > > > xfs_inode_verifier_error(ip, -EFSCORRUPTED, > > > "xfs_iformat_extents(1)", dip, sizeof(*dip), > > > @@ -233,7 +235,11 @@ xfs_iformat_data_fork( > > > * depend on it. > > > */ > > > ip->i_df.if_format = dip->di_format; > > > - ip->i_df.if_nextents = be32_to_cpu(dip->di_nextents); > > > + ip->i_df.if_nextents = be32_to_cpu(dip->di_nextents_lo); > > > + if (ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > > + ip->i_df.if_nextents |= > > > + ((u64)(be32_to_cpu(dip->di_nextents_hi)) << 32); > > > + > > > > > > switch (inode->i_mode & S_IFMT) { > > > case S_IFIFO: > > > @@ -729,31 +735,73 @@ xfs_ifork_verify_local_attr( > > > return 0; > > > } > > > > > > +static int > > > +xfs_next_set_data( > > > + struct xfs_trans *tp, > > > + struct xfs_inode *ip, > > > + struct xfs_ifork *ifp, > > > + int delta) > > > +{ > > > + struct xfs_mount *mp = ip->i_mount; > > > + xfs_extnum_t nr_exts; > > > + > > > + nr_exts = ifp->if_nextents + delta; > > > + > > > + if ((delta > 0 && nr_exts > MAXEXTNUM) > > > + || (delta < 0 && nr_exts > ifp->if_nextents)) > > > + return -EOVERFLOW; > > > + > > > + if (ifp->if_nextents <= MAXEXTNUM31BIT && > > > + nr_exts > MAXEXTNUM31BIT && > > > + !(ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) && > > > + xfs_sb_version_has_v3inode(&mp->m_sb)) { > > > + if (!xfs_sb_version_has47bitext(&mp->m_sb)) { > > > > Urk. Again, don't indent the if test logic and the if body statements > > to the same level. > > I am sorry. I will fixup the indentation issues. > > > > > > + bool log_sb = false; > > > + > > > + spin_lock(&mp->m_sb_lock); > > > + if (!xfs_sb_version_has47bitext(&mp->m_sb)) { > > > + xfs_sb_version_add47bitext(&mp->m_sb); > > > + log_sb = true; > > > + } > > > + spin_unlock(&mp->m_sb_lock); > > > + > > > + if (log_sb) > > > + xfs_log_sb(tp); > > > + } > > > > Hm, dynamic filesystem upgrade. This probably ought to log something to > > dmesg about the upgrade. It might also be a better to make this a > > separate helper so that it's not triply-indented. > > Ok. I will implement that. > > > > > > + > > > + ip->i_d.di_flags2 |= XFS_DIFLAG2_47BIT_NEXTENTS; > > > + } > > > + > > > + ifp->if_nextents = nr_exts; > > > + > > > + return 0; > > > +} > > > + > > > int > > > xfs_next_set( > > > + struct xfs_trans *tp, > > > struct xfs_inode *ip, > > > int whichfork, > > > int delta) > > > { > > > struct xfs_ifork *ifp; > > > int64_t nr_exts; > > > - int64_t max_exts; > > > + int error = 0; > > > > > > ifp = XFS_IFORK_PTR(ip, whichfork); > > > > > > - if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) > > > - max_exts = MAXEXTNUM; > > > - else if (whichfork == XFS_ATTR_FORK) > > > - max_exts = MAXAEXTNUM; > > > - else > > > - ASSERT(0); > > > - > > > - nr_exts = ifp->if_nextents + delta; > > > - if ((delta > 0 && nr_exts > max_exts) > > > - || (delta < 0 && nr_exts < 0)) > > > - return -EOVERFLOW; > > > + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { > > > + error = xfs_next_set_data(tp, ip, ifp, delta); > > > + } else if (whichfork == XFS_ATTR_FORK) { > > > + nr_exts = ifp->if_nextents + delta; > > > + if ((delta > 0 && nr_exts > MAXAEXTNUM) > > > + || (delta < 0 && nr_exts < 0)) > > > + return -EOVERFLOW; > > > > > > - ifp->if_nextents = nr_exts; > > > + ifp->if_nextents = nr_exts; > > > + } else { > > > + ASSERT(0); > > > + } > > > > > > - return 0; > > > + return error; > > > } > > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > > > index a84ae42ace79..c74fa6371cc8 100644 > > > --- a/fs/xfs/libxfs/xfs_inode_fork.h > > > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > > > @@ -173,5 +173,6 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); > > > int xfs_ifork_verify_local_data(struct xfs_inode *ip); > > > int xfs_ifork_verify_local_attr(struct xfs_inode *ip); > > > > > > -int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); > > > +int xfs_next_set(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, > > > + int delta); > > > #endif /* __XFS_INODE_FORK_H__ */ > > > diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h > > > index e3400c9c71cd..879aadff7692 100644 > > > --- a/fs/xfs/libxfs/xfs_log_format.h > > > +++ b/fs/xfs/libxfs/xfs_log_format.h > > > @@ -396,7 +396,7 @@ struct xfs_log_dinode { > > > xfs_fsize_t di_size; /* number of bytes in file */ > > > xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ > > > xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ > > > - xfs_extnum_t di_nextents; /* number of extents in data fork */ > > > + uint32_t di_nextents_lo; /* number of extents in data fork */ > > > xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ > > > uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ > > > int8_t di_aformat; /* format of attr fork's data */ > > > @@ -414,7 +414,8 @@ struct xfs_log_dinode { > > > xfs_lsn_t di_lsn; /* flush sequence */ > > > uint64_t di_flags2; /* more random flags */ > > > uint32_t di_cowextsize; /* basic cow extent size for file */ > > > - uint8_t di_pad2[12]; /* more padding for future expansion */ > > > + uint32_t di_nextents_hi; > > > + uint8_t di_pad2[8]; /* more padding for future expansion */ > > > > > > /* fields only written to during inode creation */ > > > xfs_ictimestamp_t di_crtime; /* time created */ > > > diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h > > > index 0a3041ad5bec..c68ff2178976 100644 > > > --- a/fs/xfs/libxfs/xfs_types.h > > > +++ b/fs/xfs/libxfs/xfs_types.h > > > @@ -12,7 +12,7 @@ typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */ > > > typedef uint32_t xfs_agino_t; /* inode # within allocation grp */ > > > typedef uint32_t xfs_extlen_t; /* extent length in blocks */ > > > typedef uint32_t xfs_agnumber_t; /* allocation group number */ > > > -typedef int32_t xfs_extnum_t; /* # of extents in a file */ > > > +typedef uint64_t xfs_extnum_t; /* # of extents in a file */ > > > typedef int16_t xfs_aextnum_t; /* # extents in an attribute fork */ > > > typedef int64_t xfs_fsize_t; /* bytes in a file */ > > > typedef uint64_t xfs_ufsize_t; /* unsigned bytes in a file */ > > > @@ -59,7 +59,8 @@ typedef void * xfs_failaddr_t; > > > * Max values for extlen, extnum, aextnum. > > > */ > > > #define MAXEXTLEN ((xfs_extlen_t)0x001fffff) /* 21 bits */ > > > -#define MAXEXTNUM ((xfs_extnum_t)0x7fffffff) /* signed int */ > > > +#define MAXEXTNUM31BIT ((xfs_extnum_t)0x7fffffff) /* 31 bits */ > > > +#define MAXEXTNUM ((xfs_extnum_t)0x7fffffffffff) /* 47 bits */ > > > #define MAXDIREXTNUM ((xfs_extnum_t)0x7ffffff) /* 27 bits */ > > > #define MAXAEXTNUM ((xfs_aextnum_t)0x7fff) /* signed short */ > > > > > > diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c > > > index 6d483ab29e63..be41fd242ff2 100644 > > > --- a/fs/xfs/scrub/inode.c > > > +++ b/fs/xfs/scrub/inode.c > > > @@ -205,8 +205,8 @@ xchk_dinode( > > > struct xfs_mount *mp = sc->mp; > > > size_t fork_recs; > > > unsigned long long isize; > > > + xfs_extnum_t nextents; > > > uint64_t flags2; > > > - uint32_t nextents; > > > uint16_t flags; > > > uint16_t mode; > > > > > > @@ -354,7 +354,7 @@ xchk_dinode( > > > xchk_inode_extsize(sc, dip, ino, mode, flags); > > > > > > /* di_nextents */ > > > - nextents = be32_to_cpu(dip->di_nextents); > > > + nextents = xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK); > > > fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); > > > switch (dip->di_format) { > > > case XFS_DINODE_FMT_EXTENTS: > > > @@ -464,6 +464,7 @@ xchk_inode_xref_bmap( > > > struct xfs_scrub *sc, > > > struct xfs_dinode *dip) > > > { > > > + xfs_mount_t *mp = sc->mp; > > > > struct xfs_mount. The structure typedefs usages are deprecated and > > we're trying to get rid of them (slowly). > > Yes, I missed out on this one. I will fix this up. > > > > > --D > > > > > xfs_extnum_t nextents; > > > xfs_filblks_t count; > > > xfs_filblks_t acount; > > > @@ -477,14 +478,14 @@ xchk_inode_xref_bmap( > > > &nextents, &count); > > > if (!xchk_should_check_xref(sc, &error, NULL)) > > > return; > > > - if (nextents < be32_to_cpu(dip->di_nextents)) > > > + if (nextents < xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK)) > > > xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); > > > > > > error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, > > > &nextents, &acount); > > > if (!xchk_should_check_xref(sc, &error, NULL)) > > > return; > > > - if (nextents != be16_to_cpu(dip->di_anextents)) > > > + if (nextents != xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK)) > > > xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); > > > > > > /* Check nblocks against the inode. */ > > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > > > index 64f5f9a440ae..4418a66cf6d6 100644 > > > --- a/fs/xfs/xfs_inode.c > > > +++ b/fs/xfs/xfs_inode.c > > > @@ -3748,7 +3748,7 @@ xfs_iflush_int( > > > ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { > > > xfs_alert_tag(mp, XFS_PTAG_IFLUSH, > > > "%s: detected corrupt incore inode %Lu, " > > > - "total extents = %d, nblocks = %Ld, ptr "PTR_FMT, > > > + "total extents = %llu, nblocks = %Ld, ptr "PTR_FMT, > > > __func__, ip->i_ino, > > > ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp), > > > ip->i_d.di_nblocks, ip); > > > @@ -3785,6 +3785,10 @@ xfs_iflush_int( > > > xfs_ifork_verify_local_attr(ip)) > > > goto flush_out; > > > > > > + if (!(ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > > + && xfs_sb_version_has47bitext(&mp->m_sb)) > > > + ip->i_d.di_flags2 |= XFS_DIFLAG2_47BIT_NEXTENTS; > > > + > > > /* > > > * Copy the dirty parts of the inode into the on-disk inode. We always > > > * copy out the core of the inode, because if the inode is dirty at all > > > diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c > > > index ba47bf65b772..6f27ac7c8631 100644 > > > --- a/fs/xfs/xfs_inode_item.c > > > +++ b/fs/xfs/xfs_inode_item.c > > > @@ -326,7 +326,7 @@ xfs_inode_to_log_dinode( > > > to->di_size = from->di_size; > > > to->di_nblocks = from->di_nblocks; > > > to->di_extsize = from->di_extsize; > > > - to->di_nextents = xfs_ifork_nextents(&ip->i_df); > > > + to->di_nextents_lo = xfs_ifork_nextents(&ip->i_df) & 0xffffffffU; > > > to->di_anextents = xfs_ifork_nextents(ip->i_afp); > > > to->di_forkoff = from->di_forkoff; > > > to->di_aformat = xfs_ifork_format(ip->i_afp); > > > @@ -344,6 +344,9 @@ xfs_inode_to_log_dinode( > > > to->di_crtime.t_nsec = from->di_crtime.tv_nsec; > > > to->di_flags2 = from->di_flags2; > > > to->di_cowextsize = from->di_cowextsize; > > > + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > > + to->di_nextents_hi = > > > + xfs_ifork_nextents(&ip->i_df) >> 32; > > > to->di_ino = ip->i_ino; > > > to->di_lsn = lsn; > > > memset(to->di_pad2, 0, sizeof(to->di_pad2)); > > > diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c > > > index 10ef5ddf5429..8d64b861fb66 100644 > > > --- a/fs/xfs/xfs_inode_item_recover.c > > > +++ b/fs/xfs/xfs_inode_item_recover.c > > > @@ -134,6 +134,7 @@ xlog_recover_inode_commit_pass2( > > > struct xfs_log_dinode *ldip; > > > uint isize; > > > int need_free = 0; > > > + xfs_extnum_t nextents; > > > > > > if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { > > > in_f = item->ri_buf[0].i_addr; > > > @@ -255,16 +256,23 @@ xlog_recover_inode_commit_pass2( > > > goto out_release; > > > } > > > } > > > - if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){ > > > + > > > + nextents = ldip->di_nextents_lo; > > > + if (xfs_sb_version_has_v3inode(&mp->m_sb) && > > > + ldip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) > > > + nextents |= ((u64)(ldip->di_nextents_hi) << 32); > > > + > > > + nextents += ldip->di_anextents; > > > + > > > + if (unlikely(nextents > ldip->di_nblocks)) { > > > XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", > > > XFS_ERRLEVEL_LOW, mp, ldip, > > > sizeof(*ldip)); > > > xfs_alert(mp, > > > "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " > > > - "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", > > > + "dino bp "PTR_FMT", ino %Ld, total extents = %llu, nblocks = %Ld", > > > __func__, item, dip, bp, in_f->ilf_ino, > > > - ldip->di_nextents + ldip->di_anextents, > > > - ldip->di_nblocks); > > > + nextents, ldip->di_nblocks); > > > error = -EFSCORRUPTED; > > > goto out_release; > > > } > > > > -- > chandan > > >
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index f75b70ae7b1f..73e552678adc 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -53,9 +53,9 @@ xfs_bmap_compute_maxlevels( int whichfork, /* data or attr fork */ int dir_bmbt) /* Dir or non-dir data fork */ { + uint64_t maxleafents; /* max leaf entries possible */ int level; /* btree level */ uint maxblocks; /* max blocks at this level */ - uint maxleafents; /* max leaf entries possible */ int maxrootrecs; /* max records in root block */ int minleafrecs; /* min records in leaf block */ int minnoderecs; /* min records in node block */ @@ -477,7 +477,7 @@ xfs_bmap_check_leaf_extents( if (bp_release) xfs_trans_brelse(NULL, bp); error_norelse: - xfs_warn(mp, "%s: BAD after btree leaves for %d extents", + xfs_warn(mp, "%s: BAD after btree leaves for %llu extents", __func__, i); xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); @@ -918,7 +918,7 @@ xfs_bmap_local_to_extents( xfs_iext_first(ifp, &icur); xfs_iext_insert(ip, &icur, &rec, 0); - error = xfs_next_set(ip, whichfork, 1); + error = xfs_next_set(tp, ip, whichfork, 1); if (error) goto done; @@ -1610,7 +1610,7 @@ xfs_bmap_add_extent_delay_real( xfs_iext_prev(ifp, &bma->icur); xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); - error = xfs_next_set(bma->ip, whichfork, -1); + error = xfs_next_set(bma->tp, bma->ip, whichfork, -1); if (error) goto done; @@ -1717,7 +1717,7 @@ xfs_bmap_add_extent_delay_real( PREV.br_state = new->br_state; xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); - error = xfs_next_set(bma->ip, whichfork, 1); + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); if (error) goto done; @@ -1786,7 +1786,7 @@ xfs_bmap_add_extent_delay_real( */ xfs_iext_update_extent(bma->ip, state, &bma->icur, new); - error = xfs_next_set(bma->ip, whichfork, 1); + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); if (error) goto done; @@ -1876,7 +1876,7 @@ xfs_bmap_add_extent_delay_real( */ xfs_iext_update_extent(bma->ip, state, &bma->icur, new); - error = xfs_next_set(bma->ip, whichfork, 1); + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); if (error) goto done; @@ -1965,7 +1965,7 @@ xfs_bmap_add_extent_delay_real( xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); - error = xfs_next_set(bma->ip, whichfork, 1); + error = xfs_next_set(bma->tp, bma->ip, whichfork, 1); if (error) goto done; @@ -2172,7 +2172,7 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &LEFT); - error = xfs_next_set(ip, whichfork, -2); + error = xfs_next_set(tp, ip, whichfork, -2); if (error) goto done; @@ -2228,7 +2228,7 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &LEFT); - error = xfs_next_set(ip, whichfork, -1); + error = xfs_next_set(tp, ip, whichfork, -1); if (error) goto done; @@ -2274,7 +2274,7 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &PREV); - error = xfs_next_set(ip, whichfork, -1); + error = xfs_next_set(tp, ip, whichfork, -1); if (error) goto done; @@ -2385,7 +2385,7 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_update_extent(ip, state, icur, &PREV); xfs_iext_insert(ip, icur, new, state); - error = xfs_next_set(ip, whichfork, 1); + error = xfs_next_set(tp, ip, whichfork, 1); if (error) goto done; @@ -2464,7 +2464,7 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_next(ifp, icur); xfs_iext_insert(ip, icur, new, state); - error = xfs_next_set(ip, whichfork, 1); + error = xfs_next_set(tp, ip, whichfork, 1); if (error) goto done; @@ -2519,7 +2519,7 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_insert(ip, icur, &r[1], state); xfs_iext_insert(ip, icur, &r[0], state); - error = xfs_next_set(ip, whichfork, 2); + error = xfs_next_set(tp, ip, whichfork, 2); if (error) goto done; @@ -2838,7 +2838,7 @@ xfs_bmap_add_extent_hole_real( xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &left); - error = xfs_next_set(ip, whichfork, -1); + error = xfs_next_set(tp, ip, whichfork, -1); if (error) goto done; @@ -2940,7 +2940,7 @@ xfs_bmap_add_extent_hole_real( */ xfs_iext_insert(ip, icur, new, state); - error = xfs_next_set(ip, whichfork, 1); + error = xfs_next_set(tp, ip, whichfork, 1); if (error) goto done; @@ -5140,7 +5140,7 @@ xfs_bmap_del_extent_real( xfs_iext_remove(ip, icur, state); xfs_iext_prev(ifp, icur); - error = xfs_next_set(ip, whichfork, -1); + error = xfs_next_set(tp, ip, whichfork, -1); if (error) goto done; @@ -5252,7 +5252,7 @@ xfs_bmap_del_extent_real( } else flags |= xfs_ilog_fext(whichfork); - error = xfs_next_set(ip, whichfork, 1); + error = xfs_next_set(tp, ip, whichfork, 1); if (error) goto done; @@ -5722,7 +5722,7 @@ xfs_bmse_merge( * Update the on-disk extent count, the btree if necessary and log the * inode. */ - error = xfs_next_set(ip, whichfork, -1); + error = xfs_next_set(tp, ip, whichfork, -1); if (error) goto done; @@ -6113,7 +6113,7 @@ xfs_bmap_split_extent( xfs_iext_next(ifp, &icur); xfs_iext_insert(ip, &icur, &new, 0); - error = xfs_next_set(ip, whichfork, 1); + error = xfs_next_set(tp, ip, whichfork, 1); if (error) goto del_cursor; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index b42a52bfa1e9..91bee33aa988 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -449,10 +449,12 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ #define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ #define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */ +#define XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR (1 << 3) /* 47bit data extents */ #define XFS_SB_FEAT_RO_COMPAT_ALL \ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ - XFS_SB_FEAT_RO_COMPAT_REFLINK) + XFS_SB_FEAT_RO_COMPAT_REFLINK | \ + XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR) #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( @@ -563,6 +565,18 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp) (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK); } +static inline bool xfs_sb_version_has47bitext(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + (sbp->sb_features_ro_compat & + XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR); +} + +static inline void xfs_sb_version_add47bitext(struct xfs_sb *sbp) +{ + sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR; +} + /* * end of superblock version macros */ @@ -873,7 +887,7 @@ typedef struct xfs_dinode { __be64 di_size; /* number of bytes in file */ __be64 di_nblocks; /* # of direct & btree blocks used */ __be32 di_extsize; /* basic/minimum extent size for file */ - __be32 di_nextents; /* number of extents in data fork */ + __be32 di_nextents_lo; /* number of extents in data fork */ __be16 di_anextents; /* number of extents in attribute fork*/ __u8 di_forkoff; /* attr fork offs, <<3 for 64b align */ __s8 di_aformat; /* format of attr fork's data */ @@ -891,7 +905,8 @@ typedef struct xfs_dinode { __be64 di_lsn; /* flush sequence */ __be64 di_flags2; /* more random flags */ __be32 di_cowextsize; /* basic cow extent size for file */ - __u8 di_pad2[12]; /* more padding for future expansion */ + __be32 di_nextents_hi; + __u8 di_pad2[8]; /* more padding for future expansion */ /* fields only written to during inode creation */ xfs_timestamp_t di_crtime; /* time created */ @@ -992,10 +1007,6 @@ enum xfs_dinode_fmt { ((w) == XFS_DATA_FORK ? \ (dip)->di_format : \ (dip)->di_aformat) -#define XFS_DFORK_NEXTENTS(dip,w) \ - ((w) == XFS_DATA_FORK ? \ - be32_to_cpu((dip)->di_nextents) : \ - be16_to_cpu((dip)->di_anextents)) /* * For block and character special files the 32bit dev_t is stored at the @@ -1061,12 +1072,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */ #define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ #define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ +#define XFS_DIFLAG2_47BIT_NEXTENTS_BIT 3 /* Uses di_nextents_hi field */ #define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) #define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) #define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) +#define XFS_DIFLAG2_47BIT_NEXTENTS (1 << XFS_DIFLAG2_47BIT_NEXTENTS_BIT) #define XFS_DIFLAG2_ANY \ - (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE) + (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ + XFS_DIFLAG2_47BIT_NEXTENTS) /* * Inode number format: diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 6f84ea85fdd8..8b89fe080f70 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -307,7 +307,8 @@ xfs_inode_to_disk( to->di_size = cpu_to_be64(from->di_size); to->di_nblocks = cpu_to_be64(from->di_nblocks); to->di_extsize = cpu_to_be32(from->di_extsize); - to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); + to->di_nextents_lo = cpu_to_be32(xfs_ifork_nextents(&ip->i_df) & + 0xffffffffU); to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp)); to->di_forkoff = from->di_forkoff; to->di_aformat = xfs_ifork_format(ip->i_afp); @@ -322,6 +323,10 @@ xfs_inode_to_disk( to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec); to->di_flags2 = cpu_to_be64(from->di_flags2); to->di_cowextsize = cpu_to_be32(from->di_cowextsize); + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) + to->di_nextents_hi + = cpu_to_be32(xfs_ifork_nextents(&ip->i_df) + >> 32); to->di_ino = cpu_to_be64(ip->i_ino); to->di_lsn = cpu_to_be64(lsn); memset(to->di_pad2, 0, sizeof(to->di_pad2)); @@ -360,7 +365,7 @@ xfs_log_dinode_to_disk( to->di_size = cpu_to_be64(from->di_size); to->di_nblocks = cpu_to_be64(from->di_nblocks); to->di_extsize = cpu_to_be32(from->di_extsize); - to->di_nextents = cpu_to_be32(from->di_nextents); + to->di_nextents_lo = cpu_to_be32(from->di_nextents_lo); to->di_anextents = cpu_to_be16(from->di_anextents); to->di_forkoff = from->di_forkoff; to->di_aformat = from->di_aformat; @@ -375,6 +380,9 @@ xfs_log_dinode_to_disk( to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); to->di_flags2 = cpu_to_be64(from->di_flags2); to->di_cowextsize = cpu_to_be32(from->di_cowextsize); + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) + to->di_nextents_hi = + cpu_to_be32(from->di_nextents_hi); to->di_ino = cpu_to_be64(from->di_ino); to->di_lsn = cpu_to_be64(from->di_lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); @@ -391,7 +399,9 @@ xfs_dinode_verify_fork( struct xfs_mount *mp, int whichfork) { - uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork); + xfs_extnum_t di_nextents; + + di_nextents = xfs_dfork_nextents(&mp->m_sb, dip, whichfork); switch (XFS_DFORK_FORMAT(dip, whichfork)) { case XFS_DINODE_FMT_LOCAL: @@ -462,6 +472,8 @@ xfs_dinode_verify( uint16_t flags; uint64_t flags2; uint64_t di_size; + xfs_extnum_t nextents; + int64_t nblocks; if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) return __this_address; @@ -492,10 +504,12 @@ xfs_dinode_verify( if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) return __this_address; + nextents = xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK); + nextents += xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK); + nblocks = be64_to_cpu(dip->di_nblocks); + /* Fork checks carried over from xfs_iformat_fork */ - if (mode && - be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > - be64_to_cpu(dip->di_nblocks)) + if (mode && nextents > nblocks) return __this_address; if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) @@ -716,3 +730,23 @@ xfs_inode_validate_cowextsize( return NULL; } + +xfs_extnum_t +xfs_dfork_nextents( + struct xfs_sb *sbp, + struct xfs_dinode *dip, + int whichfork) +{ + xfs_extnum_t nextents; + + if (whichfork == XFS_DATA_FORK) { + nextents = be32_to_cpu(dip->di_nextents_lo); + if (xfs_sb_version_has_v3inode(sbp) + && (dip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)) + nextents |= (u64)(be32_to_cpu(dip->di_nextents_hi)) + << 32; + return nextents; + } else { + return be16_to_cpu(dip->di_anextents); + } +} diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 865ac493c72a..4583db53b933 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -65,5 +65,7 @@ xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp, xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, uint32_t cowextsize, uint16_t mode, uint16_t flags, uint64_t flags2); +xfs_extnum_t xfs_dfork_nextents(struct xfs_sb *sbp, struct xfs_dinode *dip, + int whichfork); #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 3bf5a2c391bd..ec682e2d5bcb 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -10,6 +10,7 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" +#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" @@ -103,21 +104,22 @@ xfs_iformat_extents( int whichfork) { struct xfs_mount *mp = ip->i_mount; + struct xfs_sb *sb = &mp->m_sb; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_extnum_t nex = xfs_dfork_nextents(sb, dip, whichfork); int state = xfs_bmap_fork_to_state(whichfork); - int nex = XFS_DFORK_NEXTENTS(dip, whichfork); int size = nex * sizeof(xfs_bmbt_rec_t); struct xfs_iext_cursor icur; struct xfs_bmbt_rec *dp; struct xfs_bmbt_irec new; - int i; + xfs_extnum_t i; /* * If the number of extents is unreasonable, then something is wrong and * we just bail out rather than crash in kmem_alloc() or memcpy() below. */ if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) { - xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", + xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %llu).", (unsigned long long) ip->i_ino, nex); xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iformat_extents(1)", dip, sizeof(*dip), @@ -233,7 +235,11 @@ xfs_iformat_data_fork( * depend on it. */ ip->i_df.if_format = dip->di_format; - ip->i_df.if_nextents = be32_to_cpu(dip->di_nextents); + ip->i_df.if_nextents = be32_to_cpu(dip->di_nextents_lo); + if (ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) + ip->i_df.if_nextents |= + ((u64)(be32_to_cpu(dip->di_nextents_hi)) << 32); + switch (inode->i_mode & S_IFMT) { case S_IFIFO: @@ -729,31 +735,73 @@ xfs_ifork_verify_local_attr( return 0; } +static int +xfs_next_set_data( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_ifork *ifp, + int delta) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_extnum_t nr_exts; + + nr_exts = ifp->if_nextents + delta; + + if ((delta > 0 && nr_exts > MAXEXTNUM) + || (delta < 0 && nr_exts > ifp->if_nextents)) + return -EOVERFLOW; + + if (ifp->if_nextents <= MAXEXTNUM31BIT && + nr_exts > MAXEXTNUM31BIT && + !(ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) && + xfs_sb_version_has_v3inode(&mp->m_sb)) { + if (!xfs_sb_version_has47bitext(&mp->m_sb)) { + bool log_sb = false; + + spin_lock(&mp->m_sb_lock); + if (!xfs_sb_version_has47bitext(&mp->m_sb)) { + xfs_sb_version_add47bitext(&mp->m_sb); + log_sb = true; + } + spin_unlock(&mp->m_sb_lock); + + if (log_sb) + xfs_log_sb(tp); + } + + ip->i_d.di_flags2 |= XFS_DIFLAG2_47BIT_NEXTENTS; + } + + ifp->if_nextents = nr_exts; + + return 0; +} + int xfs_next_set( + struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, int delta) { struct xfs_ifork *ifp; int64_t nr_exts; - int64_t max_exts; + int error = 0; ifp = XFS_IFORK_PTR(ip, whichfork); - if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) - max_exts = MAXEXTNUM; - else if (whichfork == XFS_ATTR_FORK) - max_exts = MAXAEXTNUM; - else - ASSERT(0); - - nr_exts = ifp->if_nextents + delta; - if ((delta > 0 && nr_exts > max_exts) - || (delta < 0 && nr_exts < 0)) - return -EOVERFLOW; + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { + error = xfs_next_set_data(tp, ip, ifp, delta); + } else if (whichfork == XFS_ATTR_FORK) { + nr_exts = ifp->if_nextents + delta; + if ((delta > 0 && nr_exts > MAXAEXTNUM) + || (delta < 0 && nr_exts < 0)) + return -EOVERFLOW; - ifp->if_nextents = nr_exts; + ifp->if_nextents = nr_exts; + } else { + ASSERT(0); + } - return 0; + return error; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index a84ae42ace79..c74fa6371cc8 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -173,5 +173,6 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); int xfs_ifork_verify_local_data(struct xfs_inode *ip); int xfs_ifork_verify_local_attr(struct xfs_inode *ip); -int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); +int xfs_next_set(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, + int delta); #endif /* __XFS_INODE_FORK_H__ */ diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index e3400c9c71cd..879aadff7692 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -396,7 +396,7 @@ struct xfs_log_dinode { xfs_fsize_t di_size; /* number of bytes in file */ xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ - xfs_extnum_t di_nextents; /* number of extents in data fork */ + uint32_t di_nextents_lo; /* number of extents in data fork */ xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ int8_t di_aformat; /* format of attr fork's data */ @@ -414,7 +414,8 @@ struct xfs_log_dinode { xfs_lsn_t di_lsn; /* flush sequence */ uint64_t di_flags2; /* more random flags */ uint32_t di_cowextsize; /* basic cow extent size for file */ - uint8_t di_pad2[12]; /* more padding for future expansion */ + uint32_t di_nextents_hi; + uint8_t di_pad2[8]; /* more padding for future expansion */ /* fields only written to during inode creation */ xfs_ictimestamp_t di_crtime; /* time created */ diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 0a3041ad5bec..c68ff2178976 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -12,7 +12,7 @@ typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */ typedef uint32_t xfs_agino_t; /* inode # within allocation grp */ typedef uint32_t xfs_extlen_t; /* extent length in blocks */ typedef uint32_t xfs_agnumber_t; /* allocation group number */ -typedef int32_t xfs_extnum_t; /* # of extents in a file */ +typedef uint64_t xfs_extnum_t; /* # of extents in a file */ typedef int16_t xfs_aextnum_t; /* # extents in an attribute fork */ typedef int64_t xfs_fsize_t; /* bytes in a file */ typedef uint64_t xfs_ufsize_t; /* unsigned bytes in a file */ @@ -59,7 +59,8 @@ typedef void * xfs_failaddr_t; * Max values for extlen, extnum, aextnum. */ #define MAXEXTLEN ((xfs_extlen_t)0x001fffff) /* 21 bits */ -#define MAXEXTNUM ((xfs_extnum_t)0x7fffffff) /* signed int */ +#define MAXEXTNUM31BIT ((xfs_extnum_t)0x7fffffff) /* 31 bits */ +#define MAXEXTNUM ((xfs_extnum_t)0x7fffffffffff) /* 47 bits */ #define MAXDIREXTNUM ((xfs_extnum_t)0x7ffffff) /* 27 bits */ #define MAXAEXTNUM ((xfs_aextnum_t)0x7fff) /* signed short */ diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 6d483ab29e63..be41fd242ff2 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -205,8 +205,8 @@ xchk_dinode( struct xfs_mount *mp = sc->mp; size_t fork_recs; unsigned long long isize; + xfs_extnum_t nextents; uint64_t flags2; - uint32_t nextents; uint16_t flags; uint16_t mode; @@ -354,7 +354,7 @@ xchk_dinode( xchk_inode_extsize(sc, dip, ino, mode, flags); /* di_nextents */ - nextents = be32_to_cpu(dip->di_nextents); + nextents = xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK); fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); switch (dip->di_format) { case XFS_DINODE_FMT_EXTENTS: @@ -464,6 +464,7 @@ xchk_inode_xref_bmap( struct xfs_scrub *sc, struct xfs_dinode *dip) { + xfs_mount_t *mp = sc->mp; xfs_extnum_t nextents; xfs_filblks_t count; xfs_filblks_t acount; @@ -477,14 +478,14 @@ xchk_inode_xref_bmap( &nextents, &count); if (!xchk_should_check_xref(sc, &error, NULL)) return; - if (nextents < be32_to_cpu(dip->di_nextents)) + if (nextents < xfs_dfork_nextents(&mp->m_sb, dip, XFS_DATA_FORK)) xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, &nextents, &acount); if (!xchk_should_check_xref(sc, &error, NULL)) return; - if (nextents != be16_to_cpu(dip->di_anextents)) + if (nextents != xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK)) xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); /* Check nblocks against the inode. */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 64f5f9a440ae..4418a66cf6d6 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3748,7 +3748,7 @@ xfs_iflush_int( ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: detected corrupt incore inode %Lu, " - "total extents = %d, nblocks = %Ld, ptr "PTR_FMT, + "total extents = %llu, nblocks = %Ld, ptr "PTR_FMT, __func__, ip->i_ino, ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp), ip->i_d.di_nblocks, ip); @@ -3785,6 +3785,10 @@ xfs_iflush_int( xfs_ifork_verify_local_attr(ip)) goto flush_out; + if (!(ip->i_d.di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) + && xfs_sb_version_has47bitext(&mp->m_sb)) + ip->i_d.di_flags2 |= XFS_DIFLAG2_47BIT_NEXTENTS; + /* * Copy the dirty parts of the inode into the on-disk inode. We always * copy out the core of the inode, because if the inode is dirty at all diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index ba47bf65b772..6f27ac7c8631 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -326,7 +326,7 @@ xfs_inode_to_log_dinode( to->di_size = from->di_size; to->di_nblocks = from->di_nblocks; to->di_extsize = from->di_extsize; - to->di_nextents = xfs_ifork_nextents(&ip->i_df); + to->di_nextents_lo = xfs_ifork_nextents(&ip->i_df) & 0xffffffffU; to->di_anextents = xfs_ifork_nextents(ip->i_afp); to->di_forkoff = from->di_forkoff; to->di_aformat = xfs_ifork_format(ip->i_afp); @@ -344,6 +344,9 @@ xfs_inode_to_log_dinode( to->di_crtime.t_nsec = from->di_crtime.tv_nsec; to->di_flags2 = from->di_flags2; to->di_cowextsize = from->di_cowextsize; + if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) + to->di_nextents_hi = + xfs_ifork_nextents(&ip->i_df) >> 32; to->di_ino = ip->i_ino; to->di_lsn = lsn; memset(to->di_pad2, 0, sizeof(to->di_pad2)); diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 10ef5ddf5429..8d64b861fb66 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -134,6 +134,7 @@ xlog_recover_inode_commit_pass2( struct xfs_log_dinode *ldip; uint isize; int need_free = 0; + xfs_extnum_t nextents; if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { in_f = item->ri_buf[0].i_addr; @@ -255,16 +256,23 @@ xlog_recover_inode_commit_pass2( goto out_release; } } - if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){ + + nextents = ldip->di_nextents_lo; + if (xfs_sb_version_has_v3inode(&mp->m_sb) && + ldip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS) + nextents |= ((u64)(ldip->di_nextents_hi) << 32); + + nextents += ldip->di_anextents; + + if (unlikely(nextents > ldip->di_nblocks)) { XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", XFS_ERRLEVEL_LOW, mp, ldip, sizeof(*ldip)); xfs_alert(mp, "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " - "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", + "dino bp "PTR_FMT", ino %Ld, total extents = %llu, nblocks = %Ld", __func__, item, dip, bp, in_f->ilf_ino, - ldip->di_nextents + ldip->di_anextents, - ldip->di_nblocks); + nextents, ldip->di_nblocks); error = -EFSCORRUPTED; goto out_release; }
This commit extends the per-inode data extent counter to 47 bits. The length of 47-bits was chosen because, Maximum file size = 2^63. Maximum extent count when using 64k block size = 2^63 / 2^16 = 2^47. The following changes are made to accomplish this, 1. A new ro-compat superblock flag to prevent older kernels from mounting the filesystem in read-write mode. This flag is set for the first time when an inode would end up having more than 2^31 extents. 3. Carve out a new 32-bit field from xfs_dinode->di_pad2[]. This field holds the most significant 15 bits of the data extent counter. 2. A new inode->di_flags2 flag to indicate that the newly added field contains valid data. This flag is set when one of the following two conditions are met, - When the inode is about to have more than 2^31 extents. - When flushing the incore inode (See xfs_iflush_int()), if the superblock ro-compat flag is already set. Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> --- fs/xfs/libxfs/xfs_bmap.c | 40 ++++++++-------- fs/xfs/libxfs/xfs_format.h | 30 ++++++++---- fs/xfs/libxfs/xfs_inode_buf.c | 46 +++++++++++++++--- fs/xfs/libxfs/xfs_inode_buf.h | 2 + fs/xfs/libxfs/xfs_inode_fork.c | 84 ++++++++++++++++++++++++++------- fs/xfs/libxfs/xfs_inode_fork.h | 3 +- fs/xfs/libxfs/xfs_log_format.h | 5 +- fs/xfs/libxfs/xfs_types.h | 5 +- fs/xfs/scrub/inode.c | 9 ++-- fs/xfs/xfs_inode.c | 6 ++- fs/xfs/xfs_inode_item.c | 5 +- fs/xfs/xfs_inode_item_recover.c | 16 +++++-- 12 files changed, 184 insertions(+), 67 deletions(-)