diff mbox series

[7/7] xfs: Extend attr extent counter to 32 bits

Message ID 20200606082745.15174-8-chandanrlinux@gmail.com (mailing list archive)
State New, archived
Headers show
Series xfs: Extend per-inode extent counters. | expand

Commit Message

Chandan Babu R June 6, 2020, 8:27 a.m. UTC
This commit extends the per-inode attr extent counter to 32 bits.

The following changes are made to accomplish this,
1. A new ro-compat superblock flag to prevent older kernels from
   mounting the filesystem in read-write mode. This flag is set for the
   first time when an inode would end up having more than 2^15 extents.
3. Carve out a new 16-bit field from xfs_dinode->di_pad2[]. This field
   holds the most significant 16 bits of the attr extent counter.
2. A new inode->di_flags2 flag to indicate that the newly added field
   contains valid data. This flag is set when one of the following two
   conditions are met,
   - When the inode is about to have more than 2^15 extents.
   - When flushing the incore inode (See xfs_iflush_int()), if
     the superblock ro-compat flag is already set.

Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com>
---
 fs/xfs/libxfs/xfs_format.h      | 25 ++++++++++---
 fs/xfs/libxfs/xfs_inode_buf.c   | 23 +++++++++---
 fs/xfs/libxfs/xfs_inode_fork.c  | 62 ++++++++++++++++++++++++++-------
 fs/xfs/libxfs/xfs_log_format.h  |  5 +--
 fs/xfs/libxfs/xfs_types.h       |  5 +--
 fs/xfs/scrub/inode.c            |  5 +--
 fs/xfs/xfs_inode.c              |  4 +++
 fs/xfs/xfs_inode_item.c         |  5 ++-
 fs/xfs/xfs_inode_item_recover.c |  8 ++++-
 9 files changed, 113 insertions(+), 29 deletions(-)

Comments

Darrick J. Wong June 8, 2020, 5:21 p.m. UTC | #1
On Sat, Jun 06, 2020 at 01:57:45PM +0530, Chandan Babu R wrote:
> This commit extends the per-inode attr extent counter to 32 bits.
> 
> The following changes are made to accomplish this,
> 1. A new ro-compat superblock flag to prevent older kernels from
>    mounting the filesystem in read-write mode. This flag is set for the
>    first time when an inode would end up having more than 2^15 extents.
> 3. Carve out a new 16-bit field from xfs_dinode->di_pad2[]. This field
>    holds the most significant 16 bits of the attr extent counter.

How difficult is it to end up with an attr fork mapping more than 2^32
blocks?  Supposing I have a file with nlinks==2^32-1, each mapped to a
255-byte name and some number of other xattrs?

> 2. A new inode->di_flags2 flag to indicate that the newly added field
>    contains valid data. This flag is set when one of the following two
>    conditions are met,
>    - When the inode is about to have more than 2^15 extents.
>    - When flushing the incore inode (See xfs_iflush_int()), if
>      the superblock ro-compat flag is already set.
> 
> Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com>
> ---
>  fs/xfs/libxfs/xfs_format.h      | 25 ++++++++++---
>  fs/xfs/libxfs/xfs_inode_buf.c   | 23 +++++++++---
>  fs/xfs/libxfs/xfs_inode_fork.c  | 62 ++++++++++++++++++++++++++-------
>  fs/xfs/libxfs/xfs_log_format.h  |  5 +--
>  fs/xfs/libxfs/xfs_types.h       |  5 +--
>  fs/xfs/scrub/inode.c            |  5 +--
>  fs/xfs/xfs_inode.c              |  4 +++
>  fs/xfs/xfs_inode_item.c         |  5 ++-
>  fs/xfs/xfs_inode_item_recover.c |  8 ++++-
>  9 files changed, 113 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> index 91bee33aa988..2e37d887fd35 100644
> --- a/fs/xfs/libxfs/xfs_format.h
> +++ b/fs/xfs/libxfs/xfs_format.h
> @@ -450,11 +450,13 @@ xfs_sb_has_compat_feature(
>  #define XFS_SB_FEAT_RO_COMPAT_RMAPBT   (1 << 1)		/* reverse map btree */
>  #define XFS_SB_FEAT_RO_COMPAT_REFLINK  (1 << 2)		/* reflinked files */
>  #define XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR (1 << 3)	/* 47bit data extents */
> +#define XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR (1 << 4)	/* 32bit attr extents */

Can we bundle both of these changes in a single feature flag?  I would
like to keep our feature testing matrix as small as we can.

/* 64-bit data fork extent counts and 32-bit attr fork extent counts */
#define XFS_SB_FEAT_RO_COMPAT_BIG_FORK	(1 << 4)

>  #define XFS_SB_FEAT_RO_COMPAT_ALL \
>  		(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
>  		 XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
>  		 XFS_SB_FEAT_RO_COMPAT_REFLINK | \
> -		 XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR)
> +		 XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR | \
> +		 XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR)
>  #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN	~XFS_SB_FEAT_RO_COMPAT_ALL
>  static inline bool
>  xfs_sb_has_ro_compat_feature(
> @@ -577,6 +579,18 @@ static inline void xfs_sb_version_add47bitext(struct xfs_sb *sbp)
>  	sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR;
>  }
>  
> +static inline bool xfs_sb_version_has32bitaext(struct xfs_sb *sbp)
> +{
> +	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
> +		(sbp->sb_features_ro_compat &
> +			XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR);
> +}
> +
> +static inline void xfs_sb_version_add32bitaext(struct xfs_sb *sbp)
> +{
> +	sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR;
> +}
> +
>  /*
>   * end of superblock version macros
>   */
> @@ -888,7 +902,7 @@ typedef struct xfs_dinode {
>  	__be64		di_nblocks;	/* # of direct & btree blocks used */
>  	__be32		di_extsize;	/* basic/minimum extent size for file */
>  	__be32		di_nextents_lo;	/* number of extents in data fork */
> -	__be16		di_anextents;	/* number of extents in attribute fork*/
> +	__be16		di_anextents_lo;/* lower part of xattr extent count */
>  	__u8		di_forkoff;	/* attr fork offs, <<3 for 64b align */
>  	__s8		di_aformat;	/* format of attr fork's data */
>  	__be32		di_dmevmask;	/* DMIG event mask */
> @@ -906,7 +920,8 @@ typedef struct xfs_dinode {
>  	__be64		di_flags2;	/* more random flags */
>  	__be32		di_cowextsize;	/* basic cow extent size for file */
>  	__be32		di_nextents_hi;
> -	__u8		di_pad2[8];	/* more padding for future expansion */
> +	__be16		di_anextents_hi;/* higher part of xattr extent count */
> +	__u8		di_pad2[6];	/* more padding for future expansion */
>  
>  	/* fields only written to during inode creation */
>  	xfs_timestamp_t	di_crtime;	/* time created */
> @@ -1073,14 +1088,16 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
>  #define XFS_DIFLAG2_REFLINK_BIT	1	/* file's blocks may be shared */
>  #define XFS_DIFLAG2_COWEXTSIZE_BIT   2  /* copy on write extent size hint */
>  #define XFS_DIFLAG2_47BIT_NEXTENTS_BIT 3 /* Uses di_nextents_hi field */
> +#define XFS_DIFLAG2_32BIT_ANEXTENTS_BIT 4 /* Uses di_anextents_hi field  */

Same thing here.

>  #define XFS_DIFLAG2_DAX		(1 << XFS_DIFLAG2_DAX_BIT)
>  #define XFS_DIFLAG2_REFLINK     (1 << XFS_DIFLAG2_REFLINK_BIT)
>  #define XFS_DIFLAG2_COWEXTSIZE  (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
>  #define XFS_DIFLAG2_47BIT_NEXTENTS (1 << XFS_DIFLAG2_47BIT_NEXTENTS_BIT)
> +#define XFS_DIFLAG2_32BIT_ANEXTENTS (1 << XFS_DIFLAG2_32BIT_ANEXTENTS_BIT)
>  
>  #define XFS_DIFLAG2_ANY \
>  	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
> -	 XFS_DIFLAG2_47BIT_NEXTENTS)
> +	 XFS_DIFLAG2_47BIT_NEXTENTS | XFS_DIFLAG2_32BIT_ANEXTENTS)
>  
>  /*
>   * Inode number format:
> diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
> index 8b89fe080f70..285cbce0cd10 100644
> --- a/fs/xfs/libxfs/xfs_inode_buf.c
> +++ b/fs/xfs/libxfs/xfs_inode_buf.c
> @@ -309,7 +309,8 @@ xfs_inode_to_disk(
>  	to->di_extsize = cpu_to_be32(from->di_extsize);
>  	to->di_nextents_lo = cpu_to_be32(xfs_ifork_nextents(&ip->i_df) &
>  					0xffffffffU);
> -	to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
> +	to->di_anextents_lo = cpu_to_be16(xfs_ifork_nextents(ip->i_afp) &
> +					0xffffU);
>  	to->di_forkoff = from->di_forkoff;
>  	to->di_aformat = xfs_ifork_format(ip->i_afp);
>  	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
> @@ -327,6 +328,10 @@ xfs_inode_to_disk(
>  			to->di_nextents_hi
>  				= cpu_to_be32(xfs_ifork_nextents(&ip->i_df)
>  					>> 32);
> +		if (from->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> +			to->di_anextents_hi
> +				= cpu_to_be16(xfs_ifork_nextents(ip->i_afp)
> +					>> 16);
>  		to->di_ino = cpu_to_be64(ip->i_ino);
>  		to->di_lsn = cpu_to_be64(lsn);
>  		memset(to->di_pad2, 0, sizeof(to->di_pad2));
> @@ -366,7 +371,7 @@ xfs_log_dinode_to_disk(
>  	to->di_nblocks = cpu_to_be64(from->di_nblocks);
>  	to->di_extsize = cpu_to_be32(from->di_extsize);
>  	to->di_nextents_lo = cpu_to_be32(from->di_nextents_lo);
> -	to->di_anextents = cpu_to_be16(from->di_anextents);
> +	to->di_anextents_lo = cpu_to_be16(from->di_anextents_lo);
>  	to->di_forkoff = from->di_forkoff;
>  	to->di_aformat = from->di_aformat;
>  	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
> @@ -383,6 +388,9 @@ xfs_log_dinode_to_disk(
>  		if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)
>  			to->di_nextents_hi =
>  				cpu_to_be32(from->di_nextents_hi);
> +		if (from->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> +			to->di_anextents_hi =
> +				cpu_to_be16(from->di_anextents_hi);
>  		to->di_ino = cpu_to_be64(from->di_ino);
>  		to->di_lsn = cpu_to_be64(from->di_lsn);
>  		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
> @@ -566,7 +574,7 @@ xfs_dinode_verify(
>  		default:
>  			return __this_address;
>  		}
> -		if (dip->di_anextents)
> +		if (xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK))
>  			return __this_address;
>  	}
>  
> @@ -745,8 +753,13 @@ xfs_dfork_nextents(
>  			&& (dip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS))
>  			nextents |= (u64)(be32_to_cpu(dip->di_nextents_hi))
>  				<< 32;
> -		return nextents;
>  	} else {
> -		return be16_to_cpu(dip->di_anextents);
> +		nextents = be16_to_cpu(dip->di_anextents_lo);
> +		if (xfs_sb_version_has_v3inode(sbp)
> +			&& (dip->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS))
> +			nextents |= (u32)(be16_to_cpu(dip->di_anextents_hi))

<same if test logic vs. if body statement indentation complaint>

> +				<< 16;
>  	}
> +
> +	return nextents;
>  }
> diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
> index ec682e2d5bcb..169e16947ece 100644
> --- a/fs/xfs/libxfs/xfs_inode_fork.c
> +++ b/fs/xfs/libxfs/xfs_inode_fork.c
> @@ -301,7 +301,10 @@ xfs_iformat_attr_fork(
>  	ip->i_afp->if_format = dip->di_aformat;
>  	if (unlikely(ip->i_afp->if_format == 0)) /* pre IRIX 6.2 file system */
>  		ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
> -	ip->i_afp->if_nextents = be16_to_cpu(dip->di_anextents);
> +	ip->i_afp->if_nextents = be16_to_cpu(dip->di_anextents_lo);
> +	if (ip->i_d.di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> +		ip->i_afp->if_nextents |=
> +			(u32)(be16_to_cpu(dip->di_anextents_hi)) << 16;
>  
>  	switch (ip->i_afp->if_format) {
>  	case XFS_DINODE_FMT_LOCAL:
> @@ -777,6 +780,48 @@ xfs_next_set_data(
>  	return 0;
>  }
>  
> +static int
> +xfs_next_set_attr(
> +	struct xfs_trans	*tp,
> +	struct xfs_inode	*ip,
> +	struct xfs_ifork	*ifp,
> +	int			delta)
> +{
> +	struct xfs_mount	*mp = ip->i_mount;
> +	xfs_aextnum_t		nr_exts;
> +
> +	nr_exts = ifp->if_nextents + delta;
> +
> +	if ((delta > 0 && nr_exts < ifp->if_nextents) ||
> +		(delta < 0 && nr_exts > ifp->if_nextents))
> +		return -EOVERFLOW;
> +
> +	if (ifp->if_nextents <= MAXAEXTNUM15BIT &&
> +		nr_exts > MAXAEXTNUM15BIT &&
> +		!(ip->i_d.di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS) &&
> +		xfs_sb_version_has_v3inode(&mp->m_sb)) {
> +		if (!xfs_sb_version_has32bitaext(&mp->m_sb)) {

Indentation complaint^2

> +			bool log_sb = false;
> +
> +			spin_lock(&mp->m_sb_lock);
> +			if (!xfs_sb_version_has32bitaext(&mp->m_sb)) {
> +				xfs_sb_version_add32bitaext(&mp->m_sb);
> +				log_sb = true;
> +			}
> +			spin_unlock(&mp->m_sb_lock);
> +
> +			if (log_sb)
> +				xfs_log_sb(tp);
> +		}
> +
> +		ip->i_d.di_flags2 |= XFS_DIFLAG2_32BIT_ANEXTENTS;
> +	}
> +
> +	ifp->if_nextents = nr_exts;
> +
> +	return 0;
> +}
> +
>  int
>  xfs_next_set(
>  	struct xfs_trans	*tp,
> @@ -785,23 +830,16 @@ xfs_next_set(
>  	int			delta)
>  {
>  	struct xfs_ifork	*ifp;
> -	int64_t			nr_exts;
>  	int			error = 0;
>  
>  	ifp = XFS_IFORK_PTR(ip, whichfork);
>  
> -	if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
> +	if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK)
>  		error = xfs_next_set_data(tp, ip, ifp, delta);
> -	} else if (whichfork == XFS_ATTR_FORK) {
> -		nr_exts = ifp->if_nextents + delta;
> -		if ((delta > 0 && nr_exts > MAXAEXTNUM)
> -			|| (delta < 0 && nr_exts < 0))
> -			return -EOVERFLOW;
> -
> -		ifp->if_nextents = nr_exts;
> -	} else {
> +	else if (whichfork == XFS_ATTR_FORK)
> +		error = xfs_next_set_attr(tp, ip, ifp, delta);
> +	else
>  		ASSERT(0);
> -	}
>  
>  	return error;
>  }
> diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
> index 879aadff7692..db419fc862bc 100644
> --- a/fs/xfs/libxfs/xfs_log_format.h
> +++ b/fs/xfs/libxfs/xfs_log_format.h
> @@ -397,7 +397,7 @@ struct xfs_log_dinode {
>  	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
>  	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
>  	uint32_t	di_nextents_lo;	/* number of extents in data fork */
> -	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
> +	uint16_t	di_anextents_lo;/* lower part of xattr extent count */
>  	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
>  	int8_t		di_aformat;	/* format of attr fork's data */
>  	uint32_t	di_dmevmask;	/* DMIG event mask */
> @@ -415,7 +415,8 @@ struct xfs_log_dinode {
>  	uint64_t	di_flags2;	/* more random flags */
>  	uint32_t	di_cowextsize;	/* basic cow extent size for file */
>  	uint32_t	di_nextents_hi;
> -	uint8_t		di_pad2[8];	/* more padding for future expansion */
> +	uint16_t	di_anextents_hi;/* higher part of xattr extent count */
> +	uint8_t		di_pad2[6];	/* more padding for future expansion */
>  
>  	/* fields only written to during inode creation */
>  	xfs_ictimestamp_t di_crtime;	/* time created */
> diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
> index c68ff2178976..974737a9e9c1 100644
> --- a/fs/xfs/libxfs/xfs_types.h
> +++ b/fs/xfs/libxfs/xfs_types.h
> @@ -13,7 +13,7 @@ typedef uint32_t	xfs_agino_t;	/* inode # within allocation grp */
>  typedef uint32_t	xfs_extlen_t;	/* extent length in blocks */
>  typedef uint32_t	xfs_agnumber_t;	/* allocation group number */
>  typedef uint64_t	xfs_extnum_t;	/* # of extents in a file */
> -typedef int16_t		xfs_aextnum_t;	/* # extents in an attribute fork */
> +typedef uint32_t	xfs_aextnum_t;	/* # extents in an attribute fork */
>  typedef int64_t		xfs_fsize_t;	/* bytes in a file */
>  typedef uint64_t	xfs_ufsize_t;	/* unsigned bytes in a file */
>  
> @@ -62,7 +62,8 @@ typedef void *		xfs_failaddr_t;
>  #define	MAXEXTNUM31BIT	((xfs_extnum_t)0x7fffffff)	/* 31 bits */
>  #define	MAXEXTNUM	((xfs_extnum_t)0x7fffffffffff)	/* 47 bits */
>  #define	MAXDIREXTNUM	((xfs_extnum_t)0x7ffffff)	/* 27 bits */
> -#define	MAXAEXTNUM	((xfs_aextnum_t)0x7fff)		/* signed short */
> +#define	MAXAEXTNUM15BIT	((xfs_aextnum_t)0x7fff)		/* 15 bits */
> +#define	MAXAEXTNUM	((xfs_aextnum_t)0xffffffff)	/* 32 bits */
>  
>  /*
>   * Minimum and maximum blocksize and sectorsize.
> diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
> index be41fd242ff2..01e60c78a3a3 100644
> --- a/fs/xfs/scrub/inode.c
> +++ b/fs/xfs/scrub/inode.c
> @@ -371,10 +371,12 @@ xchk_dinode(
>  		break;
>  	}
>  
> +	nextents = xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK);
> +
>  	/* di_forkoff */
>  	if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
>  		xchk_ino_set_corrupt(sc, ino);
> -	if (dip->di_anextents != 0 && dip->di_forkoff == 0)
> +	if (nextents != 0 && dip->di_forkoff == 0)
>  		xchk_ino_set_corrupt(sc, ino);
>  	if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
>  		xchk_ino_set_corrupt(sc, ino);
> @@ -386,7 +388,6 @@ xchk_dinode(
>  		xchk_ino_set_corrupt(sc, ino);
>  
>  	/* di_anextents */
> -	nextents = be16_to_cpu(dip->di_anextents);
>  	fork_recs =  XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
>  	switch (dip->di_aformat) {
>  	case XFS_DINODE_FMT_EXTENTS:
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 4418a66cf6d6..6ec34e069344 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -3789,6 +3789,10 @@ xfs_iflush_int(
>  		&& xfs_sb_version_has47bitext(&mp->m_sb))
>  		ip->i_d.di_flags2 |= XFS_DIFLAG2_47BIT_NEXTENTS;
>  
> +	if (!(ip->i_d.di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> +		&& xfs_sb_version_has32bitaext(&mp->m_sb))
> +		ip->i_d.di_flags2 |= XFS_DIFLAG2_32BIT_ANEXTENTS;
> +
>  	/*
>  	 * Copy the dirty parts of the inode into the on-disk inode.  We always
>  	 * copy out the core of the inode, because if the inode is dirty at all
> diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> index 6f27ac7c8631..40f0a19d1c07 100644
> --- a/fs/xfs/xfs_inode_item.c
> +++ b/fs/xfs/xfs_inode_item.c
> @@ -327,7 +327,7 @@ xfs_inode_to_log_dinode(
>  	to->di_nblocks = from->di_nblocks;
>  	to->di_extsize = from->di_extsize;
>  	to->di_nextents_lo = xfs_ifork_nextents(&ip->i_df) & 0xffffffffU;
> -	to->di_anextents = xfs_ifork_nextents(ip->i_afp);
> +	to->di_anextents_lo = xfs_ifork_nextents(ip->i_afp) & 0xffffU;
>  	to->di_forkoff = from->di_forkoff;
>  	to->di_aformat = xfs_ifork_format(ip->i_afp);
>  	to->di_dmevmask = from->di_dmevmask;
> @@ -347,6 +347,9 @@ xfs_inode_to_log_dinode(
>  		if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)
>  			to->di_nextents_hi =
>  				xfs_ifork_nextents(&ip->i_df) >> 32;
> +		if (from->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> +			to->di_anextents_hi =
> +				xfs_ifork_nextents(ip->i_afp) >> 16;
>  		to->di_ino = ip->i_ino;
>  		to->di_lsn = lsn;
>  		memset(to->di_pad2, 0, sizeof(to->di_pad2));
> diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
> index 8d64b861fb66..c8b5fbba848b 100644
> --- a/fs/xfs/xfs_inode_item_recover.c
> +++ b/fs/xfs/xfs_inode_item_recover.c
> @@ -135,6 +135,7 @@ xlog_recover_inode_commit_pass2(
>  	uint				isize;
>  	int				need_free = 0;
>  	xfs_extnum_t			nextents;
> +	xfs_aextnum_t			anextents;
>  
>  	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
>  		in_f = item->ri_buf[0].i_addr;
> @@ -262,7 +263,12 @@ xlog_recover_inode_commit_pass2(
>  		ldip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)
>  		nextents |= ((u64)(ldip->di_nextents_hi) << 32);
>  
> -	nextents += ldip->di_anextents;
> +	anextents = ldip->di_anextents_lo;
> +	if (xfs_sb_version_has_v3inode(&mp->m_sb) &&
> +		ldip->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> +		anextents |= ((u32)(ldip->di_anextents_hi) << 16);
> +
> +	nextents += anextents;
>  
>  	if (unlikely(nextents > ldip->di_nblocks)) {
>  		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
> -- 
> 2.20.1
>
Chandan Babu R June 9, 2020, 2:22 p.m. UTC | #2
On Monday 8 June 2020 10:51:21 PM IST Darrick J. Wong wrote:
> On Sat, Jun 06, 2020 at 01:57:45PM +0530, Chandan Babu R wrote:
> > This commit extends the per-inode attr extent counter to 32 bits.
> > 
> > The following changes are made to accomplish this,
> > 1. A new ro-compat superblock flag to prevent older kernels from
> >    mounting the filesystem in read-write mode. This flag is set for the
> >    first time when an inode would end up having more than 2^15 extents.
> > 3. Carve out a new 16-bit field from xfs_dinode->di_pad2[]. This field
> >    holds the most significant 16 bits of the attr extent counter.
> 
> How difficult is it to end up with an attr fork mapping more than 2^32
> blocks?  Supposing I have a file with nlinks==2^32-1, each mapped to a
> 255-byte name and some number of other xattrs?

- 2^32 nlinks each having 255 byte sized name.
  - Size of one xattr
    - name + value = 16 + 255 = 271
      16 comes from the size of the following structure,
      #+BEGIN_SRC fundamental
        struct xfs_parent_name_rec {
                __be64  p_ino;
                __be32  p_gen;
                __be32  p_diroffset;
        };
      #+END_SRC
  - sizeof(xfs_attr_leaf_hdr_t)
    32
  - sizeof(xfs_attr_leaf_entry_t)
    8
  - Number of entries in a 1k leaf block
    (1024 - sizeof(xfs_attr_leaf_hdr_t)) / (8 + 271)
    = (1024 - 32) / 279
    = 992 / 279
    = floor(3.55)
    = 3
  - Nr leaves = (2^32 / 3) * 3 (magicpct) = 4.3 billion
  - Nr entries per node = (1024 - sizeof(struct xfs_da3_node_hdr)) / sizeof(struct xfs_da_node_entry)
    = (1024 - 64) / 8
    = 120 entries
  - Nr entries at level (n - 1) = 4.3 billion / 120 = 36 million
  - Nr entries at level (n - 2) = 36 million / 120 = 300k
  - Nr entries at level (n - 3) = 300k / 120 = 2.5k
  - Nr entries at level (n - 4) = 2.5k / 120 = 20
  - Nr entries at level (n - 5) = 20 / 120 = 1
  Hence with 1024 block size, the maximum height (i.e. XFS_DA_NODE_MAXDEPTH)
  allowed for a dabtree would act as a limit.

  With 4k block size,
  - Number of entries in a 4k leaf block
    (4096 - sizeof(xfs_attr_leaf_hdr_t)) / (8 + 271)
    = (4096 - 32) / 279
    = 4064 / 279
    = floor(14.56)
    = 14
  - Nr leaves = (2^32 / 14) * 3 (magicpct) = 920 million
  - Nr entries per node = (4096 - sizeof(struct xfs_da3_node_hdr)) / sizeof(struct xfs_da_node_entry)
    = (4096 - 64) / 8
    = 504 entries
  - Nr entries at level (n - 1) = 920 million / 504 = 1.8 million
  - Nr entries at level (n - 2) = 1.8 million / 504 = 3.6k
  - Nr entries at level (n - 3) = 3.6k / 504 = 7
  - Nr entries at level (n - 4) = 7 / 504 = 1

  Total number of extents = 920 million + 1.8 million
  = 922 million
  < 2^32 (4.2 billion).

So we still have ample space in the 32-bit counter. 

> 
> > 2. A new inode->di_flags2 flag to indicate that the newly added field
> >    contains valid data. This flag is set when one of the following two
> >    conditions are met,
> >    - When the inode is about to have more than 2^15 extents.
> >    - When flushing the incore inode (See xfs_iflush_int()), if
> >      the superblock ro-compat flag is already set.
> > 
> > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com>
> > ---
> >  fs/xfs/libxfs/xfs_format.h      | 25 ++++++++++---
> >  fs/xfs/libxfs/xfs_inode_buf.c   | 23 +++++++++---
> >  fs/xfs/libxfs/xfs_inode_fork.c  | 62 ++++++++++++++++++++++++++-------
> >  fs/xfs/libxfs/xfs_log_format.h  |  5 +--
> >  fs/xfs/libxfs/xfs_types.h       |  5 +--
> >  fs/xfs/scrub/inode.c            |  5 +--
> >  fs/xfs/xfs_inode.c              |  4 +++
> >  fs/xfs/xfs_inode_item.c         |  5 ++-
> >  fs/xfs/xfs_inode_item_recover.c |  8 ++++-
> >  9 files changed, 113 insertions(+), 29 deletions(-)
> > 
> > diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> > index 91bee33aa988..2e37d887fd35 100644
> > --- a/fs/xfs/libxfs/xfs_format.h
> > +++ b/fs/xfs/libxfs/xfs_format.h
> > @@ -450,11 +450,13 @@ xfs_sb_has_compat_feature(
> >  #define XFS_SB_FEAT_RO_COMPAT_RMAPBT   (1 << 1)		/* reverse map btree */
> >  #define XFS_SB_FEAT_RO_COMPAT_REFLINK  (1 << 2)		/* reflinked files */
> >  #define XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR (1 << 3)	/* 47bit data extents */
> > +#define XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR (1 << 4)	/* 32bit attr extents */
> 
> Can we bundle both of these changes in a single feature flag?  I would
> like to keep our feature testing matrix as small as we can.
> 
> /* 64-bit data fork extent counts and 32-bit attr fork extent counts */
> #define XFS_SB_FEAT_RO_COMPAT_BIG_FORK	(1 << 4)

Sure, this should be easy to implement.

> 
> >  #define XFS_SB_FEAT_RO_COMPAT_ALL \
> >  		(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
> >  		 XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
> >  		 XFS_SB_FEAT_RO_COMPAT_REFLINK | \
> > -		 XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR)
> > +		 XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR | \
> > +		 XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR)
> >  #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN	~XFS_SB_FEAT_RO_COMPAT_ALL
> >  static inline bool
> >  xfs_sb_has_ro_compat_feature(
> > @@ -577,6 +579,18 @@ static inline void xfs_sb_version_add47bitext(struct xfs_sb *sbp)
> >  	sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR;
> >  }
> >  
> > +static inline bool xfs_sb_version_has32bitaext(struct xfs_sb *sbp)
> > +{
> > +	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
> > +		(sbp->sb_features_ro_compat &
> > +			XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR);
> > +}
> > +
> > +static inline void xfs_sb_version_add32bitaext(struct xfs_sb *sbp)
> > +{
> > +	sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR;
> > +}
> > +
> >  /*
> >   * end of superblock version macros
> >   */
> > @@ -888,7 +902,7 @@ typedef struct xfs_dinode {
> >  	__be64		di_nblocks;	/* # of direct & btree blocks used */
> >  	__be32		di_extsize;	/* basic/minimum extent size for file */
> >  	__be32		di_nextents_lo;	/* number of extents in data fork */
> > -	__be16		di_anextents;	/* number of extents in attribute fork*/
> > +	__be16		di_anextents_lo;/* lower part of xattr extent count */
> >  	__u8		di_forkoff;	/* attr fork offs, <<3 for 64b align */
> >  	__s8		di_aformat;	/* format of attr fork's data */
> >  	__be32		di_dmevmask;	/* DMIG event mask */
> > @@ -906,7 +920,8 @@ typedef struct xfs_dinode {
> >  	__be64		di_flags2;	/* more random flags */
> >  	__be32		di_cowextsize;	/* basic cow extent size for file */
> >  	__be32		di_nextents_hi;
> > -	__u8		di_pad2[8];	/* more padding for future expansion */
> > +	__be16		di_anextents_hi;/* higher part of xattr extent count */
> > +	__u8		di_pad2[6];	/* more padding for future expansion */
> >  
> >  	/* fields only written to during inode creation */
> >  	xfs_timestamp_t	di_crtime;	/* time created */
> > @@ -1073,14 +1088,16 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
> >  #define XFS_DIFLAG2_REFLINK_BIT	1	/* file's blocks may be shared */
> >  #define XFS_DIFLAG2_COWEXTSIZE_BIT   2  /* copy on write extent size hint */
> >  #define XFS_DIFLAG2_47BIT_NEXTENTS_BIT 3 /* Uses di_nextents_hi field */
> > +#define XFS_DIFLAG2_32BIT_ANEXTENTS_BIT 4 /* Uses di_anextents_hi field  */
> 
> Same thing here.

Ok.

> 
> >  #define XFS_DIFLAG2_DAX		(1 << XFS_DIFLAG2_DAX_BIT)
> >  #define XFS_DIFLAG2_REFLINK     (1 << XFS_DIFLAG2_REFLINK_BIT)
> >  #define XFS_DIFLAG2_COWEXTSIZE  (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
> >  #define XFS_DIFLAG2_47BIT_NEXTENTS (1 << XFS_DIFLAG2_47BIT_NEXTENTS_BIT)
> > +#define XFS_DIFLAG2_32BIT_ANEXTENTS (1 << XFS_DIFLAG2_32BIT_ANEXTENTS_BIT)
> >  
> >  #define XFS_DIFLAG2_ANY \
> >  	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
> > -	 XFS_DIFLAG2_47BIT_NEXTENTS)
> > +	 XFS_DIFLAG2_47BIT_NEXTENTS | XFS_DIFLAG2_32BIT_ANEXTENTS)
> >  
> >  /*
> >   * Inode number format:
> > diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
> > index 8b89fe080f70..285cbce0cd10 100644
> > --- a/fs/xfs/libxfs/xfs_inode_buf.c
> > +++ b/fs/xfs/libxfs/xfs_inode_buf.c
> > @@ -309,7 +309,8 @@ xfs_inode_to_disk(
> >  	to->di_extsize = cpu_to_be32(from->di_extsize);
> >  	to->di_nextents_lo = cpu_to_be32(xfs_ifork_nextents(&ip->i_df) &
> >  					0xffffffffU);
> > -	to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
> > +	to->di_anextents_lo = cpu_to_be16(xfs_ifork_nextents(ip->i_afp) &
> > +					0xffffU);
> >  	to->di_forkoff = from->di_forkoff;
> >  	to->di_aformat = xfs_ifork_format(ip->i_afp);
> >  	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
> > @@ -327,6 +328,10 @@ xfs_inode_to_disk(
> >  			to->di_nextents_hi
> >  				= cpu_to_be32(xfs_ifork_nextents(&ip->i_df)
> >  					>> 32);
> > +		if (from->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> > +			to->di_anextents_hi
> > +				= cpu_to_be16(xfs_ifork_nextents(ip->i_afp)
> > +					>> 16);
> >  		to->di_ino = cpu_to_be64(ip->i_ino);
> >  		to->di_lsn = cpu_to_be64(lsn);
> >  		memset(to->di_pad2, 0, sizeof(to->di_pad2));
> > @@ -366,7 +371,7 @@ xfs_log_dinode_to_disk(
> >  	to->di_nblocks = cpu_to_be64(from->di_nblocks);
> >  	to->di_extsize = cpu_to_be32(from->di_extsize);
> >  	to->di_nextents_lo = cpu_to_be32(from->di_nextents_lo);
> > -	to->di_anextents = cpu_to_be16(from->di_anextents);
> > +	to->di_anextents_lo = cpu_to_be16(from->di_anextents_lo);
> >  	to->di_forkoff = from->di_forkoff;
> >  	to->di_aformat = from->di_aformat;
> >  	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
> > @@ -383,6 +388,9 @@ xfs_log_dinode_to_disk(
> >  		if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)
> >  			to->di_nextents_hi =
> >  				cpu_to_be32(from->di_nextents_hi);
> > +		if (from->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> > +			to->di_anextents_hi =
> > +				cpu_to_be16(from->di_anextents_hi);
> >  		to->di_ino = cpu_to_be64(from->di_ino);
> >  		to->di_lsn = cpu_to_be64(from->di_lsn);
> >  		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
> > @@ -566,7 +574,7 @@ xfs_dinode_verify(
> >  		default:
> >  			return __this_address;
> >  		}
> > -		if (dip->di_anextents)
> > +		if (xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK))
> >  			return __this_address;
> >  	}
> >  
> > @@ -745,8 +753,13 @@ xfs_dfork_nextents(
> >  			&& (dip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS))
> >  			nextents |= (u64)(be32_to_cpu(dip->di_nextents_hi))
> >  				<< 32;
> > -		return nextents;
> >  	} else {
> > -		return be16_to_cpu(dip->di_anextents);
> > +		nextents = be16_to_cpu(dip->di_anextents_lo);
> > +		if (xfs_sb_version_has_v3inode(sbp)
> > +			&& (dip->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS))
> > +			nextents |= (u32)(be16_to_cpu(dip->di_anextents_hi))
> 
> <same if test logic vs. if body statement indentation complaint>

Ok. I will fix this up.

> 
> > +				<< 16;
> >  	}
> > +
> > +	return nextents;
> >  }
> > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
> > index ec682e2d5bcb..169e16947ece 100644
> > --- a/fs/xfs/libxfs/xfs_inode_fork.c
> > +++ b/fs/xfs/libxfs/xfs_inode_fork.c
> > @@ -301,7 +301,10 @@ xfs_iformat_attr_fork(
> >  	ip->i_afp->if_format = dip->di_aformat;
> >  	if (unlikely(ip->i_afp->if_format == 0)) /* pre IRIX 6.2 file system */
> >  		ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
> > -	ip->i_afp->if_nextents = be16_to_cpu(dip->di_anextents);
> > +	ip->i_afp->if_nextents = be16_to_cpu(dip->di_anextents_lo);
> > +	if (ip->i_d.di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> > +		ip->i_afp->if_nextents |=
> > +			(u32)(be16_to_cpu(dip->di_anextents_hi)) << 16;
> >  
> >  	switch (ip->i_afp->if_format) {
> >  	case XFS_DINODE_FMT_LOCAL:
> > @@ -777,6 +780,48 @@ xfs_next_set_data(
> >  	return 0;
> >  }
> >  
> > +static int
> > +xfs_next_set_attr(
> > +	struct xfs_trans	*tp,
> > +	struct xfs_inode	*ip,
> > +	struct xfs_ifork	*ifp,
> > +	int			delta)
> > +{
> > +	struct xfs_mount	*mp = ip->i_mount;
> > +	xfs_aextnum_t		nr_exts;
> > +
> > +	nr_exts = ifp->if_nextents + delta;
> > +
> > +	if ((delta > 0 && nr_exts < ifp->if_nextents) ||
> > +		(delta < 0 && nr_exts > ifp->if_nextents))
> > +		return -EOVERFLOW;
> > +
> > +	if (ifp->if_nextents <= MAXAEXTNUM15BIT &&
> > +		nr_exts > MAXAEXTNUM15BIT &&
> > +		!(ip->i_d.di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS) &&
> > +		xfs_sb_version_has_v3inode(&mp->m_sb)) {
> > +		if (!xfs_sb_version_has32bitaext(&mp->m_sb)) {
> 
> Indentation complaint^2

Ok. I will fix this up.

> 
> > +			bool log_sb = false;
> > +
> > +			spin_lock(&mp->m_sb_lock);
> > +			if (!xfs_sb_version_has32bitaext(&mp->m_sb)) {
> > +				xfs_sb_version_add32bitaext(&mp->m_sb);
> > +				log_sb = true;
> > +			}
> > +			spin_unlock(&mp->m_sb_lock);
> > +
> > +			if (log_sb)
> > +				xfs_log_sb(tp);
> > +		}
> > +
> > +		ip->i_d.di_flags2 |= XFS_DIFLAG2_32BIT_ANEXTENTS;
> > +	}
> > +
> > +	ifp->if_nextents = nr_exts;
> > +
> > +	return 0;
> > +}
> > +
> >  int
> >  xfs_next_set(
> >  	struct xfs_trans	*tp,
> > @@ -785,23 +830,16 @@ xfs_next_set(
> >  	int			delta)
> >  {
> >  	struct xfs_ifork	*ifp;
> > -	int64_t			nr_exts;
> >  	int			error = 0;
> >  
> >  	ifp = XFS_IFORK_PTR(ip, whichfork);
> >  
> > -	if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
> > +	if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK)
> >  		error = xfs_next_set_data(tp, ip, ifp, delta);
> > -	} else if (whichfork == XFS_ATTR_FORK) {
> > -		nr_exts = ifp->if_nextents + delta;
> > -		if ((delta > 0 && nr_exts > MAXAEXTNUM)
> > -			|| (delta < 0 && nr_exts < 0))
> > -			return -EOVERFLOW;
> > -
> > -		ifp->if_nextents = nr_exts;
> > -	} else {
> > +	else if (whichfork == XFS_ATTR_FORK)
> > +		error = xfs_next_set_attr(tp, ip, ifp, delta);
> > +	else
> >  		ASSERT(0);
> > -	}
> >  
> >  	return error;
> >  }
> > diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
> > index 879aadff7692..db419fc862bc 100644
> > --- a/fs/xfs/libxfs/xfs_log_format.h
> > +++ b/fs/xfs/libxfs/xfs_log_format.h
> > @@ -397,7 +397,7 @@ struct xfs_log_dinode {
> >  	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
> >  	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
> >  	uint32_t	di_nextents_lo;	/* number of extents in data fork */
> > -	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
> > +	uint16_t	di_anextents_lo;/* lower part of xattr extent count */
> >  	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
> >  	int8_t		di_aformat;	/* format of attr fork's data */
> >  	uint32_t	di_dmevmask;	/* DMIG event mask */
> > @@ -415,7 +415,8 @@ struct xfs_log_dinode {
> >  	uint64_t	di_flags2;	/* more random flags */
> >  	uint32_t	di_cowextsize;	/* basic cow extent size for file */
> >  	uint32_t	di_nextents_hi;
> > -	uint8_t		di_pad2[8];	/* more padding for future expansion */
> > +	uint16_t	di_anextents_hi;/* higher part of xattr extent count */
> > +	uint8_t		di_pad2[6];	/* more padding for future expansion */
> >  
> >  	/* fields only written to during inode creation */
> >  	xfs_ictimestamp_t di_crtime;	/* time created */
> > diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
> > index c68ff2178976..974737a9e9c1 100644
> > --- a/fs/xfs/libxfs/xfs_types.h
> > +++ b/fs/xfs/libxfs/xfs_types.h
> > @@ -13,7 +13,7 @@ typedef uint32_t	xfs_agino_t;	/* inode # within allocation grp */
> >  typedef uint32_t	xfs_extlen_t;	/* extent length in blocks */
> >  typedef uint32_t	xfs_agnumber_t;	/* allocation group number */
> >  typedef uint64_t	xfs_extnum_t;	/* # of extents in a file */
> > -typedef int16_t		xfs_aextnum_t;	/* # extents in an attribute fork */
> > +typedef uint32_t	xfs_aextnum_t;	/* # extents in an attribute fork */
> >  typedef int64_t		xfs_fsize_t;	/* bytes in a file */
> >  typedef uint64_t	xfs_ufsize_t;	/* unsigned bytes in a file */
> >  
> > @@ -62,7 +62,8 @@ typedef void *		xfs_failaddr_t;
> >  #define	MAXEXTNUM31BIT	((xfs_extnum_t)0x7fffffff)	/* 31 bits */
> >  #define	MAXEXTNUM	((xfs_extnum_t)0x7fffffffffff)	/* 47 bits */
> >  #define	MAXDIREXTNUM	((xfs_extnum_t)0x7ffffff)	/* 27 bits */
> > -#define	MAXAEXTNUM	((xfs_aextnum_t)0x7fff)		/* signed short */
> > +#define	MAXAEXTNUM15BIT	((xfs_aextnum_t)0x7fff)		/* 15 bits */
> > +#define	MAXAEXTNUM	((xfs_aextnum_t)0xffffffff)	/* 32 bits */
> >  
> >  /*
> >   * Minimum and maximum blocksize and sectorsize.
> > diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
> > index be41fd242ff2..01e60c78a3a3 100644
> > --- a/fs/xfs/scrub/inode.c
> > +++ b/fs/xfs/scrub/inode.c
> > @@ -371,10 +371,12 @@ xchk_dinode(
> >  		break;
> >  	}
> >  
> > +	nextents = xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK);
> > +
> >  	/* di_forkoff */
> >  	if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
> >  		xchk_ino_set_corrupt(sc, ino);
> > -	if (dip->di_anextents != 0 && dip->di_forkoff == 0)
> > +	if (nextents != 0 && dip->di_forkoff == 0)
> >  		xchk_ino_set_corrupt(sc, ino);
> >  	if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
> >  		xchk_ino_set_corrupt(sc, ino);
> > @@ -386,7 +388,6 @@ xchk_dinode(
> >  		xchk_ino_set_corrupt(sc, ino);
> >  
> >  	/* di_anextents */
> > -	nextents = be16_to_cpu(dip->di_anextents);
> >  	fork_recs =  XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
> >  	switch (dip->di_aformat) {
> >  	case XFS_DINODE_FMT_EXTENTS:
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index 4418a66cf6d6..6ec34e069344 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -3789,6 +3789,10 @@ xfs_iflush_int(
> >  		&& xfs_sb_version_has47bitext(&mp->m_sb))
> >  		ip->i_d.di_flags2 |= XFS_DIFLAG2_47BIT_NEXTENTS;
> >  
> > +	if (!(ip->i_d.di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> > +		&& xfs_sb_version_has32bitaext(&mp->m_sb))
> > +		ip->i_d.di_flags2 |= XFS_DIFLAG2_32BIT_ANEXTENTS;
> > +
> >  	/*
> >  	 * Copy the dirty parts of the inode into the on-disk inode.  We always
> >  	 * copy out the core of the inode, because if the inode is dirty at all
> > diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> > index 6f27ac7c8631..40f0a19d1c07 100644
> > --- a/fs/xfs/xfs_inode_item.c
> > +++ b/fs/xfs/xfs_inode_item.c
> > @@ -327,7 +327,7 @@ xfs_inode_to_log_dinode(
> >  	to->di_nblocks = from->di_nblocks;
> >  	to->di_extsize = from->di_extsize;
> >  	to->di_nextents_lo = xfs_ifork_nextents(&ip->i_df) & 0xffffffffU;
> > -	to->di_anextents = xfs_ifork_nextents(ip->i_afp);
> > +	to->di_anextents_lo = xfs_ifork_nextents(ip->i_afp) & 0xffffU;
> >  	to->di_forkoff = from->di_forkoff;
> >  	to->di_aformat = xfs_ifork_format(ip->i_afp);
> >  	to->di_dmevmask = from->di_dmevmask;
> > @@ -347,6 +347,9 @@ xfs_inode_to_log_dinode(
> >  		if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)
> >  			to->di_nextents_hi =
> >  				xfs_ifork_nextents(&ip->i_df) >> 32;
> > +		if (from->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> > +			to->di_anextents_hi =
> > +				xfs_ifork_nextents(ip->i_afp) >> 16;
> >  		to->di_ino = ip->i_ino;
> >  		to->di_lsn = lsn;
> >  		memset(to->di_pad2, 0, sizeof(to->di_pad2));
> > diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
> > index 8d64b861fb66..c8b5fbba848b 100644
> > --- a/fs/xfs/xfs_inode_item_recover.c
> > +++ b/fs/xfs/xfs_inode_item_recover.c
> > @@ -135,6 +135,7 @@ xlog_recover_inode_commit_pass2(
> >  	uint				isize;
> >  	int				need_free = 0;
> >  	xfs_extnum_t			nextents;
> > +	xfs_aextnum_t			anextents;
> >  
> >  	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
> >  		in_f = item->ri_buf[0].i_addr;
> > @@ -262,7 +263,12 @@ xlog_recover_inode_commit_pass2(
> >  		ldip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)
> >  		nextents |= ((u64)(ldip->di_nextents_hi) << 32);
> >  
> > -	nextents += ldip->di_anextents;
> > +	anextents = ldip->di_anextents_lo;
> > +	if (xfs_sb_version_has_v3inode(&mp->m_sb) &&
> > +		ldip->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
> > +		anextents |= ((u32)(ldip->di_anextents_hi) << 16);
> > +
> > +	nextents += anextents;
> >  
> >  	if (unlikely(nextents > ldip->di_nblocks)) {
> >  		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
>
Christoph Hellwig June 19, 2020, 2:39 p.m. UTC | #3
On Sat, Jun 06, 2020 at 01:57:45PM +0530, Chandan Babu R wrote:
> This commit extends the per-inode attr extent counter to 32 bits.

And the reason for why this is needed or at least nice to have needs
to go here.
Chandan Babu R June 20, 2020, 12:53 p.m. UTC | #4
On Friday 19 June 2020 8:09:17 PM IST Christoph Hellwig wrote:
> On Sat, Jun 06, 2020 at 01:57:45PM +0530, Chandan Babu R wrote:
> > This commit extends the per-inode attr extent counter to 32 bits.
> 
> And the reason for why this is needed or at least nice to have needs
> to go here.
> 

Parent pointers are stored in xattrs of the corresponding inode. Dave had
informed me that there have been instances where we have more than 100 million
hardlinks associated with an inode. This will most likely cause the 16-bit
wide on-disk xattr extent counter to overflow as described below,

1. Insert 5 million xattrs (each having a value size of 255 bytes) and then
   delete 50% of them in an alternating  manner. 
   ./benchmark-xattrs -l 255 -n 5000000 -s 50 -f $mntpnt/testfile-0

   benchmark-xattrs.c and related sources can be obtained from
   https://github.com/chandanr/xfs-xattr-benchmark/blob/master/src/
   
2. This causes 98511 extents to be created in the attr fork of the inode.
   xfsaild/loop0  2035 [003]  9643.390490: probe:xfs_iflush_int: (ffffffffac6225c0) if_nextents=98511 inode=131

3. The incore inode fork extent counter is a signed 32-bit quantity. However
   the on-disk extent counter is an unsigned 16-bit quantity and hence cannot
   hold 98511 extents.

4. The following incorrect value is stored in the attr extent counter
   # xfs_db -f -c 'inode 131' -c 'print core.naextents' /dev/loop0
   core.naextents = -32561

I will add a generic description of the above sequence of events in the commit
message of this patch when posting the next version.
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 91bee33aa988..2e37d887fd35 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -450,11 +450,13 @@  xfs_sb_has_compat_feature(
 #define XFS_SB_FEAT_RO_COMPAT_RMAPBT   (1 << 1)		/* reverse map btree */
 #define XFS_SB_FEAT_RO_COMPAT_REFLINK  (1 << 2)		/* reflinked files */
 #define XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR (1 << 3)	/* 47bit data extents */
+#define XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR (1 << 4)	/* 32bit attr extents */
 #define XFS_SB_FEAT_RO_COMPAT_ALL \
 		(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
 		 XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
 		 XFS_SB_FEAT_RO_COMPAT_REFLINK | \
-		 XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR)
+		 XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR | \
+		 XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR)
 #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN	~XFS_SB_FEAT_RO_COMPAT_ALL
 static inline bool
 xfs_sb_has_ro_compat_feature(
@@ -577,6 +579,18 @@  static inline void xfs_sb_version_add47bitext(struct xfs_sb *sbp)
 	sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_47BIT_DEXT_CNTR;
 }
 
+static inline bool xfs_sb_version_has32bitaext(struct xfs_sb *sbp)
+{
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+		(sbp->sb_features_ro_compat &
+			XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR);
+}
+
+static inline void xfs_sb_version_add32bitaext(struct xfs_sb *sbp)
+{
+	sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_32BIT_AEXT_CNTR;
+}
+
 /*
  * end of superblock version macros
  */
@@ -888,7 +902,7 @@  typedef struct xfs_dinode {
 	__be64		di_nblocks;	/* # of direct & btree blocks used */
 	__be32		di_extsize;	/* basic/minimum extent size for file */
 	__be32		di_nextents_lo;	/* number of extents in data fork */
-	__be16		di_anextents;	/* number of extents in attribute fork*/
+	__be16		di_anextents_lo;/* lower part of xattr extent count */
 	__u8		di_forkoff;	/* attr fork offs, <<3 for 64b align */
 	__s8		di_aformat;	/* format of attr fork's data */
 	__be32		di_dmevmask;	/* DMIG event mask */
@@ -906,7 +920,8 @@  typedef struct xfs_dinode {
 	__be64		di_flags2;	/* more random flags */
 	__be32		di_cowextsize;	/* basic cow extent size for file */
 	__be32		di_nextents_hi;
-	__u8		di_pad2[8];	/* more padding for future expansion */
+	__be16		di_anextents_hi;/* higher part of xattr extent count */
+	__u8		di_pad2[6];	/* more padding for future expansion */
 
 	/* fields only written to during inode creation */
 	xfs_timestamp_t	di_crtime;	/* time created */
@@ -1073,14 +1088,16 @@  static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 #define XFS_DIFLAG2_REFLINK_BIT	1	/* file's blocks may be shared */
 #define XFS_DIFLAG2_COWEXTSIZE_BIT   2  /* copy on write extent size hint */
 #define XFS_DIFLAG2_47BIT_NEXTENTS_BIT 3 /* Uses di_nextents_hi field */
+#define XFS_DIFLAG2_32BIT_ANEXTENTS_BIT 4 /* Uses di_anextents_hi field  */
 #define XFS_DIFLAG2_DAX		(1 << XFS_DIFLAG2_DAX_BIT)
 #define XFS_DIFLAG2_REFLINK     (1 << XFS_DIFLAG2_REFLINK_BIT)
 #define XFS_DIFLAG2_COWEXTSIZE  (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
 #define XFS_DIFLAG2_47BIT_NEXTENTS (1 << XFS_DIFLAG2_47BIT_NEXTENTS_BIT)
+#define XFS_DIFLAG2_32BIT_ANEXTENTS (1 << XFS_DIFLAG2_32BIT_ANEXTENTS_BIT)
 
 #define XFS_DIFLAG2_ANY \
 	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
-	 XFS_DIFLAG2_47BIT_NEXTENTS)
+	 XFS_DIFLAG2_47BIT_NEXTENTS | XFS_DIFLAG2_32BIT_ANEXTENTS)
 
 /*
  * Inode number format:
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 8b89fe080f70..285cbce0cd10 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -309,7 +309,8 @@  xfs_inode_to_disk(
 	to->di_extsize = cpu_to_be32(from->di_extsize);
 	to->di_nextents_lo = cpu_to_be32(xfs_ifork_nextents(&ip->i_df) &
 					0xffffffffU);
-	to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
+	to->di_anextents_lo = cpu_to_be16(xfs_ifork_nextents(ip->i_afp) &
+					0xffffU);
 	to->di_forkoff = from->di_forkoff;
 	to->di_aformat = xfs_ifork_format(ip->i_afp);
 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
@@ -327,6 +328,10 @@  xfs_inode_to_disk(
 			to->di_nextents_hi
 				= cpu_to_be32(xfs_ifork_nextents(&ip->i_df)
 					>> 32);
+		if (from->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
+			to->di_anextents_hi
+				= cpu_to_be16(xfs_ifork_nextents(ip->i_afp)
+					>> 16);
 		to->di_ino = cpu_to_be64(ip->i_ino);
 		to->di_lsn = cpu_to_be64(lsn);
 		memset(to->di_pad2, 0, sizeof(to->di_pad2));
@@ -366,7 +371,7 @@  xfs_log_dinode_to_disk(
 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
 	to->di_extsize = cpu_to_be32(from->di_extsize);
 	to->di_nextents_lo = cpu_to_be32(from->di_nextents_lo);
-	to->di_anextents = cpu_to_be16(from->di_anextents);
+	to->di_anextents_lo = cpu_to_be16(from->di_anextents_lo);
 	to->di_forkoff = from->di_forkoff;
 	to->di_aformat = from->di_aformat;
 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
@@ -383,6 +388,9 @@  xfs_log_dinode_to_disk(
 		if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)
 			to->di_nextents_hi =
 				cpu_to_be32(from->di_nextents_hi);
+		if (from->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
+			to->di_anextents_hi =
+				cpu_to_be16(from->di_anextents_hi);
 		to->di_ino = cpu_to_be64(from->di_ino);
 		to->di_lsn = cpu_to_be64(from->di_lsn);
 		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
@@ -566,7 +574,7 @@  xfs_dinode_verify(
 		default:
 			return __this_address;
 		}
-		if (dip->di_anextents)
+		if (xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK))
 			return __this_address;
 	}
 
@@ -745,8 +753,13 @@  xfs_dfork_nextents(
 			&& (dip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS))
 			nextents |= (u64)(be32_to_cpu(dip->di_nextents_hi))
 				<< 32;
-		return nextents;
 	} else {
-		return be16_to_cpu(dip->di_anextents);
+		nextents = be16_to_cpu(dip->di_anextents_lo);
+		if (xfs_sb_version_has_v3inode(sbp)
+			&& (dip->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS))
+			nextents |= (u32)(be16_to_cpu(dip->di_anextents_hi))
+				<< 16;
 	}
+
+	return nextents;
 }
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index ec682e2d5bcb..169e16947ece 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -301,7 +301,10 @@  xfs_iformat_attr_fork(
 	ip->i_afp->if_format = dip->di_aformat;
 	if (unlikely(ip->i_afp->if_format == 0)) /* pre IRIX 6.2 file system */
 		ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
-	ip->i_afp->if_nextents = be16_to_cpu(dip->di_anextents);
+	ip->i_afp->if_nextents = be16_to_cpu(dip->di_anextents_lo);
+	if (ip->i_d.di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
+		ip->i_afp->if_nextents |=
+			(u32)(be16_to_cpu(dip->di_anextents_hi)) << 16;
 
 	switch (ip->i_afp->if_format) {
 	case XFS_DINODE_FMT_LOCAL:
@@ -777,6 +780,48 @@  xfs_next_set_data(
 	return 0;
 }
 
+static int
+xfs_next_set_attr(
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	struct xfs_ifork	*ifp,
+	int			delta)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_aextnum_t		nr_exts;
+
+	nr_exts = ifp->if_nextents + delta;
+
+	if ((delta > 0 && nr_exts < ifp->if_nextents) ||
+		(delta < 0 && nr_exts > ifp->if_nextents))
+		return -EOVERFLOW;
+
+	if (ifp->if_nextents <= MAXAEXTNUM15BIT &&
+		nr_exts > MAXAEXTNUM15BIT &&
+		!(ip->i_d.di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS) &&
+		xfs_sb_version_has_v3inode(&mp->m_sb)) {
+		if (!xfs_sb_version_has32bitaext(&mp->m_sb)) {
+			bool log_sb = false;
+
+			spin_lock(&mp->m_sb_lock);
+			if (!xfs_sb_version_has32bitaext(&mp->m_sb)) {
+				xfs_sb_version_add32bitaext(&mp->m_sb);
+				log_sb = true;
+			}
+			spin_unlock(&mp->m_sb_lock);
+
+			if (log_sb)
+				xfs_log_sb(tp);
+		}
+
+		ip->i_d.di_flags2 |= XFS_DIFLAG2_32BIT_ANEXTENTS;
+	}
+
+	ifp->if_nextents = nr_exts;
+
+	return 0;
+}
+
 int
 xfs_next_set(
 	struct xfs_trans	*tp,
@@ -785,23 +830,16 @@  xfs_next_set(
 	int			delta)
 {
 	struct xfs_ifork	*ifp;
-	int64_t			nr_exts;
 	int			error = 0;
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 
-	if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
+	if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK)
 		error = xfs_next_set_data(tp, ip, ifp, delta);
-	} else if (whichfork == XFS_ATTR_FORK) {
-		nr_exts = ifp->if_nextents + delta;
-		if ((delta > 0 && nr_exts > MAXAEXTNUM)
-			|| (delta < 0 && nr_exts < 0))
-			return -EOVERFLOW;
-
-		ifp->if_nextents = nr_exts;
-	} else {
+	else if (whichfork == XFS_ATTR_FORK)
+		error = xfs_next_set_attr(tp, ip, ifp, delta);
+	else
 		ASSERT(0);
-	}
 
 	return error;
 }
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 879aadff7692..db419fc862bc 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -397,7 +397,7 @@  struct xfs_log_dinode {
 	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
 	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
 	uint32_t	di_nextents_lo;	/* number of extents in data fork */
-	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
+	uint16_t	di_anextents_lo;/* lower part of xattr extent count */
 	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
 	int8_t		di_aformat;	/* format of attr fork's data */
 	uint32_t	di_dmevmask;	/* DMIG event mask */
@@ -415,7 +415,8 @@  struct xfs_log_dinode {
 	uint64_t	di_flags2;	/* more random flags */
 	uint32_t	di_cowextsize;	/* basic cow extent size for file */
 	uint32_t	di_nextents_hi;
-	uint8_t		di_pad2[8];	/* more padding for future expansion */
+	uint16_t	di_anextents_hi;/* higher part of xattr extent count */
+	uint8_t		di_pad2[6];	/* more padding for future expansion */
 
 	/* fields only written to during inode creation */
 	xfs_ictimestamp_t di_crtime;	/* time created */
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index c68ff2178976..974737a9e9c1 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -13,7 +13,7 @@  typedef uint32_t	xfs_agino_t;	/* inode # within allocation grp */
 typedef uint32_t	xfs_extlen_t;	/* extent length in blocks */
 typedef uint32_t	xfs_agnumber_t;	/* allocation group number */
 typedef uint64_t	xfs_extnum_t;	/* # of extents in a file */
-typedef int16_t		xfs_aextnum_t;	/* # extents in an attribute fork */
+typedef uint32_t	xfs_aextnum_t;	/* # extents in an attribute fork */
 typedef int64_t		xfs_fsize_t;	/* bytes in a file */
 typedef uint64_t	xfs_ufsize_t;	/* unsigned bytes in a file */
 
@@ -62,7 +62,8 @@  typedef void *		xfs_failaddr_t;
 #define	MAXEXTNUM31BIT	((xfs_extnum_t)0x7fffffff)	/* 31 bits */
 #define	MAXEXTNUM	((xfs_extnum_t)0x7fffffffffff)	/* 47 bits */
 #define	MAXDIREXTNUM	((xfs_extnum_t)0x7ffffff)	/* 27 bits */
-#define	MAXAEXTNUM	((xfs_aextnum_t)0x7fff)		/* signed short */
+#define	MAXAEXTNUM15BIT	((xfs_aextnum_t)0x7fff)		/* 15 bits */
+#define	MAXAEXTNUM	((xfs_aextnum_t)0xffffffff)	/* 32 bits */
 
 /*
  * Minimum and maximum blocksize and sectorsize.
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index be41fd242ff2..01e60c78a3a3 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -371,10 +371,12 @@  xchk_dinode(
 		break;
 	}
 
+	nextents = xfs_dfork_nextents(&mp->m_sb, dip, XFS_ATTR_FORK);
+
 	/* di_forkoff */
 	if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
 		xchk_ino_set_corrupt(sc, ino);
-	if (dip->di_anextents != 0 && dip->di_forkoff == 0)
+	if (nextents != 0 && dip->di_forkoff == 0)
 		xchk_ino_set_corrupt(sc, ino);
 	if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
 		xchk_ino_set_corrupt(sc, ino);
@@ -386,7 +388,6 @@  xchk_dinode(
 		xchk_ino_set_corrupt(sc, ino);
 
 	/* di_anextents */
-	nextents = be16_to_cpu(dip->di_anextents);
 	fork_recs =  XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
 	switch (dip->di_aformat) {
 	case XFS_DINODE_FMT_EXTENTS:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 4418a66cf6d6..6ec34e069344 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3789,6 +3789,10 @@  xfs_iflush_int(
 		&& xfs_sb_version_has47bitext(&mp->m_sb))
 		ip->i_d.di_flags2 |= XFS_DIFLAG2_47BIT_NEXTENTS;
 
+	if (!(ip->i_d.di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
+		&& xfs_sb_version_has32bitaext(&mp->m_sb))
+		ip->i_d.di_flags2 |= XFS_DIFLAG2_32BIT_ANEXTENTS;
+
 	/*
 	 * Copy the dirty parts of the inode into the on-disk inode.  We always
 	 * copy out the core of the inode, because if the inode is dirty at all
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6f27ac7c8631..40f0a19d1c07 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -327,7 +327,7 @@  xfs_inode_to_log_dinode(
 	to->di_nblocks = from->di_nblocks;
 	to->di_extsize = from->di_extsize;
 	to->di_nextents_lo = xfs_ifork_nextents(&ip->i_df) & 0xffffffffU;
-	to->di_anextents = xfs_ifork_nextents(ip->i_afp);
+	to->di_anextents_lo = xfs_ifork_nextents(ip->i_afp) & 0xffffU;
 	to->di_forkoff = from->di_forkoff;
 	to->di_aformat = xfs_ifork_format(ip->i_afp);
 	to->di_dmevmask = from->di_dmevmask;
@@ -347,6 +347,9 @@  xfs_inode_to_log_dinode(
 		if (from->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)
 			to->di_nextents_hi =
 				xfs_ifork_nextents(&ip->i_df) >> 32;
+		if (from->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
+			to->di_anextents_hi =
+				xfs_ifork_nextents(ip->i_afp) >> 16;
 		to->di_ino = ip->i_ino;
 		to->di_lsn = lsn;
 		memset(to->di_pad2, 0, sizeof(to->di_pad2));
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index 8d64b861fb66..c8b5fbba848b 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -135,6 +135,7 @@  xlog_recover_inode_commit_pass2(
 	uint				isize;
 	int				need_free = 0;
 	xfs_extnum_t			nextents;
+	xfs_aextnum_t			anextents;
 
 	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
 		in_f = item->ri_buf[0].i_addr;
@@ -262,7 +263,12 @@  xlog_recover_inode_commit_pass2(
 		ldip->di_flags2 & XFS_DIFLAG2_47BIT_NEXTENTS)
 		nextents |= ((u64)(ldip->di_nextents_hi) << 32);
 
-	nextents += ldip->di_anextents;
+	anextents = ldip->di_anextents_lo;
+	if (xfs_sb_version_has_v3inode(&mp->m_sb) &&
+		ldip->di_flags2 & XFS_DIFLAG2_32BIT_ANEXTENTS)
+		anextents |= ((u32)(ldip->di_anextents_hi) << 16);
+
+	nextents += anextents;
 
 	if (unlikely(nextents > ldip->di_nblocks)) {
 		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",