diff mbox series

[V5,12/16] xfs: Introduce per-inode 64-bit extent counters

Message ID 20220121051857.221105-13-chandan.babu@oracle.com (mailing list archive)
State Superseded
Headers show
Series xfs: Extend per-inode extent counters | expand

Commit Message

Chandan Babu R Jan. 21, 2022, 5:18 a.m. UTC
This commit introduces new fields in the on-disk inode format to support
64-bit data fork extent counters and 32-bit attribute fork extent
counters. The new fields will be used only when an inode has
XFS_DIFLAG2_NREXT64 flag set. Otherwise we continue to use the regular 32-bit
data fork extent counters and 16-bit attribute fork extent counters.

Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
Suggested-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_format.h      | 22 +++++++--
 fs/xfs/libxfs/xfs_inode_buf.c   | 49 ++++++++++++++++++--
 fs/xfs/libxfs/xfs_inode_fork.h  |  6 +++
 fs/xfs/libxfs/xfs_log_format.h  | 22 +++++++--
 fs/xfs/xfs_inode_item.c         | 23 ++++++++--
 fs/xfs/xfs_inode_item_recover.c | 79 ++++++++++++++++++++++++++++-----
 6 files changed, 174 insertions(+), 27 deletions(-)

Comments

kernel test robot Jan. 25, 2022, 10:51 p.m. UTC | #1
Hi Chandan,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on xfs-linux/for-next]
[also build test ERROR on v5.17-rc1 next-20220125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Chandan-Babu-R/xfs-Extend-per-inode-extent-counters/20220121-132128
base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
config: arm-randconfig-c003-20220122 (https://download.01.org/0day-ci/archive/20220126/202201260622.XxiP4fe5-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/f12e8b5064fc3ef50c9d26f15f4a6984db59927c
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Chandan-Babu-R/xfs-Extend-per-inode-extent-counters/20220121-132128
        git checkout f12e8b5064fc3ef50c9d26f15f4a6984db59927c
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=arm SHELL=/bin/bash fs/xfs/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from <command-line>:
   In function 'xfs_check_ondisk_structs',
       inlined from 'init_xfs_fs' at fs/xfs/xfs_super.c:2223:2:
>> include/linux/compiler_types.h:335:45: error: call to '__compiletime_assert_900' declared with attribute error: XFS: sizeof(struct xfs_dinode) is wrong, expected 176
     335 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |                                             ^
   include/linux/compiler_types.h:316:25: note: in definition of macro '__compiletime_assert'
     316 |                         prefix ## suffix();                             \
         |                         ^~~~~~
   include/linux/compiler_types.h:335:9: note: in expansion of macro '_compiletime_assert'
     335 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |         ^~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
      39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
         |                                     ^~~~~~~~~~~~~~~~~~
   fs/xfs/xfs_ondisk.h:10:9: note: in expansion of macro 'BUILD_BUG_ON_MSG'
      10 |         BUILD_BUG_ON_MSG(sizeof(structname) != (size), "XFS: sizeof(" \
         |         ^~~~~~~~~~~~~~~~
   fs/xfs/xfs_ondisk.h:37:9: note: in expansion of macro 'XFS_CHECK_STRUCT_SIZE'
      37 |         XFS_CHECK_STRUCT_SIZE(struct xfs_dinode,                176);
         |         ^~~~~~~~~~~~~~~~~~~~~


vim +/__compiletime_assert_900 +335 include/linux/compiler_types.h

eb5c2d4b45e3d2 Will Deacon 2020-07-21  321  
eb5c2d4b45e3d2 Will Deacon 2020-07-21  322  #define _compiletime_assert(condition, msg, prefix, suffix) \
eb5c2d4b45e3d2 Will Deacon 2020-07-21  323  	__compiletime_assert(condition, msg, prefix, suffix)
eb5c2d4b45e3d2 Will Deacon 2020-07-21  324  
eb5c2d4b45e3d2 Will Deacon 2020-07-21  325  /**
eb5c2d4b45e3d2 Will Deacon 2020-07-21  326   * compiletime_assert - break build and emit msg if condition is false
eb5c2d4b45e3d2 Will Deacon 2020-07-21  327   * @condition: a compile-time constant condition to check
eb5c2d4b45e3d2 Will Deacon 2020-07-21  328   * @msg:       a message to emit if condition is false
eb5c2d4b45e3d2 Will Deacon 2020-07-21  329   *
eb5c2d4b45e3d2 Will Deacon 2020-07-21  330   * In tradition of POSIX assert, this macro will break the build if the
eb5c2d4b45e3d2 Will Deacon 2020-07-21  331   * supplied condition is *false*, emitting the supplied error message if the
eb5c2d4b45e3d2 Will Deacon 2020-07-21  332   * compiler has support to do so.
eb5c2d4b45e3d2 Will Deacon 2020-07-21  333   */
eb5c2d4b45e3d2 Will Deacon 2020-07-21  334  #define compiletime_assert(condition, msg) \
eb5c2d4b45e3d2 Will Deacon 2020-07-21 @335  	_compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
eb5c2d4b45e3d2 Will Deacon 2020-07-21  336  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Chandan Babu R Jan. 26, 2022, 8:50 a.m. UTC | #2
On 26 Jan 2022 at 04:21, kernel test robot wrote:
> Hi Chandan,
>
> Thank you for the patch! Yet something to improve:
>
> [auto build test ERROR on xfs-linux/for-next]
> [also build test ERROR on v5.17-rc1 next-20220125]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url:    https://github.com/0day-ci/linux/commits/Chandan-Babu-R/xfs-Extend-per-inode-extent-counters/20220121-132128
> base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
> config: arm-randconfig-c003-20220122 (https://download.01.org/0day-ci/archive/20220126/202201260622.XxiP4fe5-lkp@intel.com/config)
> compiler: arm-linux-gnueabi-gcc (GCC) 11.2.0
> reproduce (this is a W=1 build):
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # https://github.com/0day-ci/linux/commit/f12e8b5064fc3ef50c9d26f15f4a6984db59927c
>         git remote add linux-review https://github.com/0day-ci/linux
>         git fetch --no-tags linux-review Chandan-Babu-R/xfs-Extend-per-inode-extent-counters/20220121-132128
>         git checkout f12e8b5064fc3ef50c9d26f15f4a6984db59927c
>         # save the config file to linux build tree
>         mkdir build_dir
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=arm SHELL=/bin/bash fs/xfs/
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
>    In file included from <command-line>:
>    In function 'xfs_check_ondisk_structs',
>        inlined from 'init_xfs_fs' at fs/xfs/xfs_super.c:2223:2:
>>> include/linux/compiler_types.h:335:45: error: call to '__compiletime_assert_900' declared with attribute error: XFS: sizeof(struct xfs_dinode) is wrong, expected 176
>      335 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
>          |                                             ^
>    include/linux/compiler_types.h:316:25: note: in definition of macro '__compiletime_assert'
>      316 |                         prefix ## suffix();                             \
>          |                         ^~~~~~
>    include/linux/compiler_types.h:335:9: note: in expansion of macro '_compiletime_assert'
>      335 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
>          |         ^~~~~~~~~~~~~~~~~~~
>    include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
>       39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
>          |                                     ^~~~~~~~~~~~~~~~~~
>    fs/xfs/xfs_ondisk.h:10:9: note: in expansion of macro 'BUILD_BUG_ON_MSG'
>       10 |         BUILD_BUG_ON_MSG(sizeof(structname) != (size), "XFS: sizeof(" \
>          |         ^~~~~~~~~~~~~~~~
>    fs/xfs/xfs_ondisk.h:37:9: note: in expansion of macro 'XFS_CHECK_STRUCT_SIZE'
>       37 |         XFS_CHECK_STRUCT_SIZE(struct xfs_dinode,                176);
>          |         ^~~~~~~~~~~~~~~~~~~~~
>

The following newly introduced union inside "struct xfs_dinode" and the
corresponding one in "struct xfs_log_dinode",
        union {
                struct {
                        __be32  di_big_anextents; /* NREXT64 attr extents */
                        __be16  di_nrext64_pad; /* NREXT64 unused, zero */
                } __packed;
                struct {
                        __be32  di_nextents;    /* !NREXT64 data extents */
                        __be16  di_anextents;   /* !NREXT64 attr extents */
                } __packed;
        };

needs to be packed as well. I will include this fix in the next version of the
patchset.

>
> vim +/__compiletime_assert_900 +335 include/linux/compiler_types.h
>
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  321  
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  322  #define _compiletime_assert(condition, msg, prefix, suffix) \
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  323  	__compiletime_assert(condition, msg, prefix, suffix)
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  324  
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  325  /**
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  326   * compiletime_assert - break build and emit msg if condition is false
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  327   * @condition: a compile-time constant condition to check
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  328   * @msg:       a message to emit if condition is false
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  329   *
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  330   * In tradition of POSIX assert, this macro will break the build if the
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  331   * supplied condition is *false*, emitting the supplied error message if the
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  332   * compiler has support to do so.
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  333   */
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  334  #define compiletime_assert(condition, msg) \
> eb5c2d4b45e3d2 Will Deacon 2020-07-21 @335  	_compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
> eb5c2d4b45e3d2 Will Deacon 2020-07-21  336  
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Darrick J. Wong Feb. 1, 2022, 6:51 p.m. UTC | #3
On Wed, Jan 26, 2022 at 02:20:43PM +0530, Chandan Babu R wrote:
> On 26 Jan 2022 at 04:21, kernel test robot wrote:
> > Hi Chandan,
> >
> > Thank you for the patch! Yet something to improve:
> >
> > [auto build test ERROR on xfs-linux/for-next]
> > [also build test ERROR on v5.17-rc1 next-20220125]
> > [If your patch is applied to the wrong git tree, kindly drop us a note.
> > And when submitting patch, we suggest to use '--base' as documented in
> > https://git-scm.com/docs/git-format-patch]
> >
> > url:    https://github.com/0day-ci/linux/commits/Chandan-Babu-R/xfs-Extend-per-inode-extent-counters/20220121-132128
> > base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
> > config: arm-randconfig-c003-20220122 (https://download.01.org/0day-ci/archive/20220126/202201260622.XxiP4fe5-lkp@intel.com/config)
> > compiler: arm-linux-gnueabi-gcc (GCC) 11.2.0
> > reproduce (this is a W=1 build):
> >         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> >         chmod +x ~/bin/make.cross
> >         # https://github.com/0day-ci/linux/commit/f12e8b5064fc3ef50c9d26f15f4a6984db59927c
> >         git remote add linux-review https://github.com/0day-ci/linux
> >         git fetch --no-tags linux-review Chandan-Babu-R/xfs-Extend-per-inode-extent-counters/20220121-132128
> >         git checkout f12e8b5064fc3ef50c9d26f15f4a6984db59927c
> >         # save the config file to linux build tree
> >         mkdir build_dir
> >         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=arm SHELL=/bin/bash fs/xfs/
> >
> > If you fix the issue, kindly add following tag as appropriate
> > Reported-by: kernel test robot <lkp@intel.com>
> >
> > All errors (new ones prefixed by >>):
> >
> >    In file included from <command-line>:
> >    In function 'xfs_check_ondisk_structs',
> >        inlined from 'init_xfs_fs' at fs/xfs/xfs_super.c:2223:2:
> >>> include/linux/compiler_types.h:335:45: error: call to '__compiletime_assert_900' declared with attribute error: XFS: sizeof(struct xfs_dinode) is wrong, expected 176
> >      335 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
> >          |                                             ^
> >    include/linux/compiler_types.h:316:25: note: in definition of macro '__compiletime_assert'
> >      316 |                         prefix ## suffix();                             \
> >          |                         ^~~~~~
> >    include/linux/compiler_types.h:335:9: note: in expansion of macro '_compiletime_assert'
> >      335 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
> >          |         ^~~~~~~~~~~~~~~~~~~
> >    include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
> >       39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
> >          |                                     ^~~~~~~~~~~~~~~~~~
> >    fs/xfs/xfs_ondisk.h:10:9: note: in expansion of macro 'BUILD_BUG_ON_MSG'
> >       10 |         BUILD_BUG_ON_MSG(sizeof(structname) != (size), "XFS: sizeof(" \
> >          |         ^~~~~~~~~~~~~~~~
> >    fs/xfs/xfs_ondisk.h:37:9: note: in expansion of macro 'XFS_CHECK_STRUCT_SIZE'
> >       37 |         XFS_CHECK_STRUCT_SIZE(struct xfs_dinode,                176);
> >          |         ^~~~~~~~~~~~~~~~~~~~~
> >
> 
> The following newly introduced union inside "struct xfs_dinode" and the
> corresponding one in "struct xfs_log_dinode",
>         union {
>                 struct {
>                         __be32  di_big_anextents; /* NREXT64 attr extents */
>                         __be16  di_nrext64_pad; /* NREXT64 unused, zero */
>                 } __packed;
>                 struct {
>                         __be32  di_nextents;    /* !NREXT64 data extents */
>                         __be16  di_anextents;   /* !NREXT64 attr extents */
>                 } __packed;
>         };
> 
> needs to be packed as well. I will include this fix in the next version of the
> patchset.

Eughrhrghgg I hate C sometimes.

Thank you for fixing this.

--D

> >
> > vim +/__compiletime_assert_900 +335 include/linux/compiler_types.h
> >
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  321  
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  322  #define _compiletime_assert(condition, msg, prefix, suffix) \
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  323  	__compiletime_assert(condition, msg, prefix, suffix)
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  324  
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  325  /**
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  326   * compiletime_assert - break build and emit msg if condition is false
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  327   * @condition: a compile-time constant condition to check
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  328   * @msg:       a message to emit if condition is false
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  329   *
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  330   * In tradition of POSIX assert, this macro will break the build if the
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  331   * supplied condition is *false*, emitting the supplied error message if the
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  332   * compiler has support to do so.
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  333   */
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  334  #define compiletime_assert(condition, msg) \
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21 @335  	_compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
> > eb5c2d4b45e3d2 Will Deacon 2020-07-21  336  
> >
> > ---
> > 0-DAY CI Kernel Test Service, Intel Corporation
> > https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
> 
> 
> -- 
> chandan
Darrick J. Wong Feb. 1, 2022, 7:10 p.m. UTC | #4
On Fri, Jan 21, 2022 at 10:48:53AM +0530, Chandan Babu R wrote:
> This commit introduces new fields in the on-disk inode format to support
> 64-bit data fork extent counters and 32-bit attribute fork extent
> counters. The new fields will be used only when an inode has
> XFS_DIFLAG2_NREXT64 flag set. Otherwise we continue to use the regular 32-bit
> data fork extent counters and 16-bit attribute fork extent counters.
> 
> Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
> Suggested-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/libxfs/xfs_format.h      | 22 +++++++--
>  fs/xfs/libxfs/xfs_inode_buf.c   | 49 ++++++++++++++++++--
>  fs/xfs/libxfs/xfs_inode_fork.h  |  6 +++
>  fs/xfs/libxfs/xfs_log_format.h  | 22 +++++++--
>  fs/xfs/xfs_inode_item.c         | 23 ++++++++--
>  fs/xfs/xfs_inode_item_recover.c | 79 ++++++++++++++++++++++++++++-----
>  6 files changed, 174 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> index d3dfd45c39e0..df1d6ec39c45 100644
> --- a/fs/xfs/libxfs/xfs_format.h
> +++ b/fs/xfs/libxfs/xfs_format.h
> @@ -792,16 +792,30 @@ struct xfs_dinode {
>  	__be32		di_nlink;	/* number of links to file */
>  	__be16		di_projid_lo;	/* lower part of owner's project id */
>  	__be16		di_projid_hi;	/* higher part owner's project id */
> -	__u8		di_pad[6];	/* unused, zeroed space */
> -	__be16		di_flushiter;	/* incremented on flush */
> +	union {
> +		__be64	di_big_nextents;/* NREXT64 data extents */
> +		__u8	di_v3_pad[8];	/* !NREXT64 V3 inode zeroed space */
> +		struct {
> +			__u8	di_v2_pad[6];	/* V2 inode zeroed space */
> +			__be16	di_flushiter;	/* V2 inode incremented on flush */
> +		};
> +	};

I think it might be time to reflow part of the comments for these fields
away from inline...

	union {
		/* Number of data fork extents if NREXT64 is set */
		__be64	di_big_nextents;

		/* Padding for V3 inodes without NREXT64 set. */
		__be64	di_v3_pad;

		/* Padding and inode flush counter for V2 inodes. */
		struct {
			__u8	di_v2_pad[6];
			__be16	di_flushiter;
		};
	};

>  	xfs_timestamp_t	di_atime;	/* time last accessed */
>  	xfs_timestamp_t	di_mtime;	/* time last modified */
>  	xfs_timestamp_t	di_ctime;	/* time created/inode modified */
>  	__be64		di_size;	/* number of bytes in file */
>  	__be64		di_nblocks;	/* # of direct & btree blocks used */
>  	__be32		di_extsize;	/* basic/minimum extent size for file */
> -	__be32		di_nextents;	/* number of extents in data fork */
> -	__be16		di_anextents;	/* number of extents in attribute fork*/
> +	union {
> +		struct {
> +			__be32	di_big_anextents; /* NREXT64 attr extents */
> +			__be16	di_nrext64_pad; /* NREXT64 unused, zero */
> +		} __packed;
> +		struct {
> +			__be32	di_nextents;	/* !NREXT64 data extents */
> +			__be16	di_anextents;	/* !NREXT64 attr extents */
> +		} __packed;
> +	};


	union {
		/*
		 * For V2 inodes and V3 inodes without NREXT64 set, this
		 * is the number of data and attr fork extents.
		 */
		struct {
			__be32	di_nextents;
			__be16	di_anextents;
		} __packed;

		/* Number of attr fork extents if NREXT64 is set. */
		struct {
			__be32	di_big_anextents;
			__be16	di_nrext64_pad;
		} __packed;
	} __packed;

>  	__u8		di_forkoff;	/* attr fork offs, <<3 for 64b align */
>  	__s8		di_aformat;	/* format of attr fork's data */
>  	__be32		di_dmevmask;	/* DMIG event mask */
> diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
> index 34f360a38603..2200526bcee0 100644
> --- a/fs/xfs/libxfs/xfs_inode_buf.c
> +++ b/fs/xfs/libxfs/xfs_inode_buf.c
> @@ -279,6 +279,25 @@ xfs_inode_to_disk_ts(
>  	return ts;
>  }
>  
> +static inline void
> +xfs_inode_to_disk_iext_counters(
> +	struct xfs_inode	*ip,
> +	struct xfs_dinode	*to)
> +{
> +	if (xfs_inode_has_nrext64(ip)) {
> +		to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df));
> +		to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(ip->i_afp));
> +		/*
> +		 * We might be upgrading the inode to use larger extent counters
> +		 * than was previously used. Hence zero the unused field.
> +		 */
> +		to->di_nrext64_pad = cpu_to_be16(0);
> +	} else {
> +		to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
> +		to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
> +	}
> +}
> +
>  void
>  xfs_inode_to_disk(
>  	struct xfs_inode	*ip,
> @@ -296,7 +315,6 @@ xfs_inode_to_disk(
>  	to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff);
>  	to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16);
>  
> -	memset(to->di_pad, 0, sizeof(to->di_pad));
>  	to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
>  	to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
>  	to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
> @@ -307,8 +325,6 @@ xfs_inode_to_disk(
>  	to->di_size = cpu_to_be64(ip->i_disk_size);
>  	to->di_nblocks = cpu_to_be64(ip->i_nblocks);
>  	to->di_extsize = cpu_to_be32(ip->i_extsize);
> -	to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
> -	to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
>  	to->di_forkoff = ip->i_forkoff;
>  	to->di_aformat = xfs_ifork_format(ip->i_afp);
>  	to->di_flags = cpu_to_be16(ip->i_diflags);
> @@ -323,11 +339,14 @@ xfs_inode_to_disk(
>  		to->di_lsn = cpu_to_be64(lsn);
>  		memset(to->di_pad2, 0, sizeof(to->di_pad2));
>  		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
> -		to->di_flushiter = 0;
> +		memset(to->di_v3_pad, 0, sizeof(to->di_v3_pad));
>  	} else {
>  		to->di_version = 2;
>  		to->di_flushiter = cpu_to_be16(ip->i_flushiter);
> +		memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
>  	}
> +
> +	xfs_inode_to_disk_iext_counters(ip, to);
>  }
>  
>  static xfs_failaddr_t
> @@ -397,6 +416,24 @@ xfs_dinode_verify_forkoff(
>  	return NULL;
>  }
>  
> +static xfs_failaddr_t
> +xfs_dinode_verify_nextents(
> +	struct xfs_mount	*mp,
> +	struct xfs_dinode	*dip)
> +{
> +	if (xfs_dinode_has_nrext64(dip)) {
> +		if (!xfs_has_nrext64(mp))
> +			return __this_address;
> +		if (dip->di_nrext64_pad != 0)
> +			return __this_address;

Don't we need to check that:

		if (xfs_dfork_data_extents(dip) > XFS_MAX_EXTCNT_DATA_FORK)
			return __this_address;
		if (xfs_dfork_attr_extents(dip) > XFS_MAX_EXTCNT_ATTR_FORK)
			return __this_address;

here?

OH, the actual checking of the extent count fields is in
xfs_dinode_verify_fork, isn't it?

I think that means this function exists to check the consistency of the
nrext64 inode flag vs. the superblock nrext64 flag and the padding
fields, right?

In which case... perhaps this should be xfs_dinode_verify_nrext64() ?

> +	} else {
> +		if (dip->di_version == 3 && dip->di_big_nextents != 0)
> +			return __this_address;

We're using tagged unions in xfs_dinode, then "di_big_nextents" is
meaningless on an inode that doesn't have NREXT64 set.  IOWs,

	} else if (dip->di_version >= 3) {
		if (dip->di_v3_pad != 0)
			return __this_address;
	}

(Note that I changed the type of di_v3_pad above.)

> +	}
> +
> +	return NULL;
> +}
> +
>  xfs_failaddr_t
>  xfs_dinode_verify(
>  	struct xfs_mount	*mp,
> @@ -440,6 +477,10 @@ xfs_dinode_verify(
>  	if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
>  		return __this_address;
>  
> +	fa = xfs_dinode_verify_nextents(mp, dip);
> +	if (fa)
> +		return fa;
> +
>  	nextents = xfs_dfork_data_extents(dip);
>  	nextents += xfs_dfork_attr_extents(dip);
>  	nblocks = be64_to_cpu(dip->di_nblocks);
> diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
> index e56803436c61..8e6221e32660 100644
> --- a/fs/xfs/libxfs/xfs_inode_fork.h
> +++ b/fs/xfs/libxfs/xfs_inode_fork.h
> @@ -156,6 +156,9 @@ static inline xfs_extnum_t
>  xfs_dfork_data_extents(
>  	struct xfs_dinode	*dip)
>  {
> +	if (xfs_dinode_has_nrext64(dip))
> +		return be64_to_cpu(dip->di_big_nextents);
> +
>  	return be32_to_cpu(dip->di_nextents);
>  }
>  
> @@ -163,6 +166,9 @@ static inline xfs_extnum_t
>  xfs_dfork_attr_extents(
>  	struct xfs_dinode	*dip)
>  {
> +	if (xfs_dinode_has_nrext64(dip))
> +		return be32_to_cpu(dip->di_big_anextents);
> +
>  	return be16_to_cpu(dip->di_anextents);
>  }
>  
> diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
> index fd66e70248f7..7f4ebf112a3c 100644
> --- a/fs/xfs/libxfs/xfs_log_format.h
> +++ b/fs/xfs/libxfs/xfs_log_format.h
> @@ -388,16 +388,30 @@ struct xfs_log_dinode {
>  	uint32_t	di_nlink;	/* number of links to file */
>  	uint16_t	di_projid_lo;	/* lower part of owner's project id */
>  	uint16_t	di_projid_hi;	/* higher part of owner's project id */
> -	uint8_t		di_pad[6];	/* unused, zeroed space */
> -	uint16_t	di_flushiter;	/* incremented on flush */
> +	union {
> +		uint64_t	di_big_nextents;/* NREXT64 data extents */
> +		uint8_t		di_v3_pad[8];	/* !NREXT64 V3 inode zeroed space */
> +		struct {
> +			uint8_t	di_v2_pad[6];	/* V2 inode zeroed space */
> +			uint16_t di_flushiter;	/* V2 inode incremented on flush */
> +		};
> +	};
>  	xfs_log_timestamp_t di_atime;	/* time last accessed */
>  	xfs_log_timestamp_t di_mtime;	/* time last modified */
>  	xfs_log_timestamp_t di_ctime;	/* time created/inode modified */
>  	xfs_fsize_t	di_size;	/* number of bytes in file */
>  	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
>  	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
> -	uint32_t	di_nextents;	/* number of extents in data fork */
> -	uint16_t	di_anextents;	/* number of extents in attribute fork*/
> +	union {
> +		struct {
> +			uint32_t  di_big_anextents; /* NREXT64 attr extents */
> +			uint16_t  di_nrext64_pad; /* NREXT64 unused, zero */
> +		} __packed;
> +		struct {
> +			uint32_t  di_nextents;	  /* !NREXT64 data extents */
> +			uint16_t  di_anextents;	  /* !NREXT64 attr extents */
> +		} __packed;
> +	};

I think you could apply the same transformations as I did to xfs_dinode
above.

--D

>  	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
>  	int8_t		di_aformat;	/* format of attr fork's data */
>  	uint32_t	di_dmevmask;	/* DMIG event mask */
> diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> index 90d8e591baf8..8304ce062e43 100644
> --- a/fs/xfs/xfs_inode_item.c
> +++ b/fs/xfs/xfs_inode_item.c
> @@ -358,6 +358,21 @@ xfs_copy_dm_fields_to_log_dinode(
>  	}
>  }
>  
> +static inline void
> +xfs_inode_to_log_dinode_iext_counters(
> +	struct xfs_inode	*ip,
> +	struct xfs_log_dinode	*to)
> +{
> +	if (xfs_inode_has_nrext64(ip)) {
> +		to->di_big_nextents = xfs_ifork_nextents(&ip->i_df);
> +		to->di_big_anextents = xfs_ifork_nextents(ip->i_afp);
> +		to->di_nrext64_pad = 0;
> +	} else {
> +		to->di_nextents = xfs_ifork_nextents(&ip->i_df);
> +		to->di_anextents = xfs_ifork_nextents(ip->i_afp);
> +	}
> +}
> +
>  static void
>  xfs_inode_to_log_dinode(
>  	struct xfs_inode	*ip,
> @@ -373,7 +388,6 @@ xfs_inode_to_log_dinode(
>  	to->di_projid_lo = ip->i_projid & 0xffff;
>  	to->di_projid_hi = ip->i_projid >> 16;
>  
> -	memset(to->di_pad, 0, sizeof(to->di_pad));
>  	memset(to->di_pad3, 0, sizeof(to->di_pad3));
>  	to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime);
>  	to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime);
> @@ -385,8 +399,6 @@ xfs_inode_to_log_dinode(
>  	to->di_size = ip->i_disk_size;
>  	to->di_nblocks = ip->i_nblocks;
>  	to->di_extsize = ip->i_extsize;
> -	to->di_nextents = xfs_ifork_nextents(&ip->i_df);
> -	to->di_anextents = xfs_ifork_nextents(ip->i_afp);
>  	to->di_forkoff = ip->i_forkoff;
>  	to->di_aformat = xfs_ifork_format(ip->i_afp);
>  	to->di_flags = ip->i_diflags;
> @@ -406,11 +418,14 @@ xfs_inode_to_log_dinode(
>  		to->di_lsn = lsn;
>  		memset(to->di_pad2, 0, sizeof(to->di_pad2));
>  		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
> -		to->di_flushiter = 0;
> +		memset(to->di_v3_pad, 0, sizeof(to->di_v3_pad));
>  	} else {
>  		to->di_version = 2;
>  		to->di_flushiter = ip->i_flushiter;
> +		memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
>  	}
> +
> +	xfs_inode_to_log_dinode_iext_counters(ip, to);
>  }
>  
>  /*
> diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
> index 767a551816a0..fa3556633ca9 100644
> --- a/fs/xfs/xfs_inode_item_recover.c
> +++ b/fs/xfs/xfs_inode_item_recover.c
> @@ -148,6 +148,22 @@ static inline bool xfs_log_dinode_has_nrext64(const struct xfs_log_dinode *ld)
>  	       (ld->di_flags2 & XFS_DIFLAG2_NREXT64);
>  }
>  
> +static inline void
> +xfs_log_dinode_to_disk_iext_counters(
> +	struct xfs_log_dinode	*from,
> +	struct xfs_dinode	*to)
> +{
> +	if (xfs_log_dinode_has_nrext64(from)) {
> +		to->di_big_nextents = cpu_to_be64(from->di_big_nextents);
> +		to->di_big_anextents = cpu_to_be32(from->di_big_anextents);
> +		to->di_nrext64_pad = cpu_to_be16(from->di_nrext64_pad);
> +	} else {
> +		to->di_nextents = cpu_to_be32(from->di_nextents);
> +		to->di_anextents = cpu_to_be16(from->di_anextents);
> +	}
> +
> +}
> +
>  STATIC void
>  xfs_log_dinode_to_disk(
>  	struct xfs_log_dinode	*from,
> @@ -164,7 +180,6 @@ xfs_log_dinode_to_disk(
>  	to->di_nlink = cpu_to_be32(from->di_nlink);
>  	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
>  	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
> -	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
>  
>  	to->di_atime = xfs_log_dinode_to_disk_ts(from, from->di_atime);
>  	to->di_mtime = xfs_log_dinode_to_disk_ts(from, from->di_mtime);
> @@ -173,8 +188,6 @@ xfs_log_dinode_to_disk(
>  	to->di_size = cpu_to_be64(from->di_size);
>  	to->di_nblocks = cpu_to_be64(from->di_nblocks);
>  	to->di_extsize = cpu_to_be32(from->di_extsize);
> -	to->di_nextents = cpu_to_be32(from->di_nextents);
> -	to->di_anextents = cpu_to_be16(from->di_anextents);
>  	to->di_forkoff = from->di_forkoff;
>  	to->di_aformat = from->di_aformat;
>  	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
> @@ -192,10 +205,13 @@ xfs_log_dinode_to_disk(
>  		to->di_lsn = cpu_to_be64(lsn);
>  		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
>  		uuid_copy(&to->di_uuid, &from->di_uuid);
> -		to->di_flushiter = 0;
> +		memcpy(to->di_v3_pad, from->di_v3_pad, sizeof(to->di_v3_pad));
>  	} else {
>  		to->di_flushiter = cpu_to_be16(from->di_flushiter);
> +		memcpy(to->di_v2_pad, from->di_v2_pad, sizeof(to->di_v2_pad));
>  	}
> +
> +	xfs_log_dinode_to_disk_iext_counters(from, to);
>  }
>  
>  STATIC int
> @@ -209,6 +225,8 @@ xlog_recover_inode_commit_pass2(
>  	struct xfs_mount		*mp = log->l_mp;
>  	struct xfs_buf			*bp;
>  	struct xfs_dinode		*dip;
> +	xfs_extnum_t                    nextents;
> +	xfs_aextnum_t                   anextents;
>  	int				len;
>  	char				*src;
>  	char				*dest;
> @@ -348,21 +366,60 @@ xlog_recover_inode_commit_pass2(
>  			goto out_release;
>  		}
>  	}
> -	if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
> -		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
> +
> +	if (xfs_log_dinode_has_nrext64(ldip)) {
> +		if (!xfs_has_nrext64(mp) || (ldip->di_nrext64_pad != 0)) {
> +			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
> +				     XFS_ERRLEVEL_LOW, mp, ldip,
> +				     sizeof(*ldip));
> +			xfs_alert(mp,
> +				"%s: Bad inode log record, rec ptr "PTR_FMT", "
> +				"dino ptr "PTR_FMT", dino bp "PTR_FMT", "
> +				"ino %Ld, xfs_has_nrext64(mp) = %d, "
> +				"ldip->di_nrext64_pad = %u",
> +				__func__, item, dip, bp, in_f->ilf_ino,
> +				xfs_has_nrext64(mp), ldip->di_nrext64_pad);
> +			error = -EFSCORRUPTED;
> +			goto out_release;
> +		}
> +	} else {
> +		if (ldip->di_version == 3 && ldip->di_big_nextents != 0) {
> +			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
> +				     XFS_ERRLEVEL_LOW, mp, ldip,
> +				     sizeof(*ldip));
> +			xfs_alert(mp,
> +				"%s: Bad inode log record, rec ptr "PTR_FMT", "
> +				"dino ptr "PTR_FMT", dino bp "PTR_FMT", "
> +				"ino %Ld, ldip->di_big_dextcnt = %llu",
> +				__func__, item, dip, bp, in_f->ilf_ino,
> +				ldip->di_big_nextents);
> +			error = -EFSCORRUPTED;
> +			goto out_release;
> +		}
> +	}
> +
> +	if (xfs_log_dinode_has_nrext64(ldip)) {
> +		nextents = ldip->di_big_nextents;
> +		anextents = ldip->di_big_anextents;
> +	} else {
> +		nextents = ldip->di_nextents;
> +		anextents = ldip->di_anextents;
> +	}
> +
> +	if (unlikely(nextents + anextents > ldip->di_nblocks)) {
> +		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
>  				     XFS_ERRLEVEL_LOW, mp, ldip,
>  				     sizeof(*ldip));
>  		xfs_alert(mp,
>  	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
> -	"dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
> +	"dino bp "PTR_FMT", ino %Ld, total extents = %llu, nblocks = %Ld",
>  			__func__, item, dip, bp, in_f->ilf_ino,
> -			ldip->di_nextents + ldip->di_anextents,
> -			ldip->di_nblocks);
> +			nextents + anextents, ldip->di_nblocks);
>  		error = -EFSCORRUPTED;
>  		goto out_release;
>  	}
>  	if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
> -		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
> +		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(8)",
>  				     XFS_ERRLEVEL_LOW, mp, ldip,
>  				     sizeof(*ldip));
>  		xfs_alert(mp,
> @@ -374,7 +431,7 @@ xlog_recover_inode_commit_pass2(
>  	}
>  	isize = xfs_log_dinode_size(mp);
>  	if (unlikely(item->ri_buf[1].i_len > isize)) {
> -		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
> +		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(9)",
>  				     XFS_ERRLEVEL_LOW, mp, ldip,
>  				     sizeof(*ldip));
>  		xfs_alert(mp,
> -- 
> 2.30.2
>
Chandan Babu R Feb. 7, 2022, 4:54 a.m. UTC | #5
On 02 Feb 2022 at 00:40, Darrick J. Wong wrote:
> On Fri, Jan 21, 2022 at 10:48:53AM +0530, Chandan Babu R wrote:
>> This commit introduces new fields in the on-disk inode format to support
>> 64-bit data fork extent counters and 32-bit attribute fork extent
>> counters. The new fields will be used only when an inode has
>> XFS_DIFLAG2_NREXT64 flag set. Otherwise we continue to use the regular 32-bit
>> data fork extent counters and 16-bit attribute fork extent counters.
>> 
>> Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
>> Suggested-by: Dave Chinner <dchinner@redhat.com>
>> ---
>>  fs/xfs/libxfs/xfs_format.h      | 22 +++++++--
>>  fs/xfs/libxfs/xfs_inode_buf.c   | 49 ++++++++++++++++++--
>>  fs/xfs/libxfs/xfs_inode_fork.h  |  6 +++
>>  fs/xfs/libxfs/xfs_log_format.h  | 22 +++++++--
>>  fs/xfs/xfs_inode_item.c         | 23 ++++++++--
>>  fs/xfs/xfs_inode_item_recover.c | 79 ++++++++++++++++++++++++++++-----
>>  6 files changed, 174 insertions(+), 27 deletions(-)
>> 
>> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
>> index d3dfd45c39e0..df1d6ec39c45 100644
>> --- a/fs/xfs/libxfs/xfs_format.h
>> +++ b/fs/xfs/libxfs/xfs_format.h
>> @@ -792,16 +792,30 @@ struct xfs_dinode {
>>  	__be32		di_nlink;	/* number of links to file */
>>  	__be16		di_projid_lo;	/* lower part of owner's project id */
>>  	__be16		di_projid_hi;	/* higher part owner's project id */
>> -	__u8		di_pad[6];	/* unused, zeroed space */
>> -	__be16		di_flushiter;	/* incremented on flush */
>> +	union {
>> +		__be64	di_big_nextents;/* NREXT64 data extents */
>> +		__u8	di_v3_pad[8];	/* !NREXT64 V3 inode zeroed space */
>> +		struct {
>> +			__u8	di_v2_pad[6];	/* V2 inode zeroed space */
>> +			__be16	di_flushiter;	/* V2 inode incremented on flush */
>> +		};
>> +	};
>
> I think it might be time to reflow part of the comments for these fields
> away from inline...
>
> 	union {
> 		/* Number of data fork extents if NREXT64 is set */
> 		__be64	di_big_nextents;
>
> 		/* Padding for V3 inodes without NREXT64 set. */
> 		__be64	di_v3_pad;
>
> 		/* Padding and inode flush counter for V2 inodes. */
> 		struct {
> 			__u8	di_v2_pad[6];
> 			__be16	di_flushiter;
> 		};
> 	};
>

Ok. I will include these changes in the next version.

>>  	xfs_timestamp_t	di_atime;	/* time last accessed */
>>  	xfs_timestamp_t	di_mtime;	/* time last modified */
>>  	xfs_timestamp_t	di_ctime;	/* time created/inode modified */
>>  	__be64		di_size;	/* number of bytes in file */
>>  	__be64		di_nblocks;	/* # of direct & btree blocks used */
>>  	__be32		di_extsize;	/* basic/minimum extent size for file */
>> -	__be32		di_nextents;	/* number of extents in data fork */
>> -	__be16		di_anextents;	/* number of extents in attribute fork*/
>> +	union {
>> +		struct {
>> +			__be32	di_big_anextents; /* NREXT64 attr extents */
>> +			__be16	di_nrext64_pad; /* NREXT64 unused, zero */
>> +		} __packed;
>> +		struct {
>> +			__be32	di_nextents;	/* !NREXT64 data extents */
>> +			__be16	di_anextents;	/* !NREXT64 attr extents */
>> +		} __packed;
>> +	};
>
>
> 	union {
> 		/*
> 		 * For V2 inodes and V3 inodes without NREXT64 set, this
> 		 * is the number of data and attr fork extents.
> 		 */
> 		struct {
> 			__be32	di_nextents;
> 			__be16	di_anextents;
> 		} __packed;
>
> 		/* Number of attr fork extents if NREXT64 is set. */
> 		struct {
> 			__be32	di_big_anextents;
> 			__be16	di_nrext64_pad;
> 		} __packed;
> 	} __packed;
>
>>  	__u8		di_forkoff;	/* attr fork offs, <<3 for 64b align */
>>  	__s8		di_aformat;	/* format of attr fork's data */
>>  	__be32		di_dmevmask;	/* DMIG event mask */
>> diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
>> index 34f360a38603..2200526bcee0 100644
>> --- a/fs/xfs/libxfs/xfs_inode_buf.c
>> +++ b/fs/xfs/libxfs/xfs_inode_buf.c
>> @@ -279,6 +279,25 @@ xfs_inode_to_disk_ts(
>>  	return ts;
>>  }
>>  
>> +static inline void
>> +xfs_inode_to_disk_iext_counters(
>> +	struct xfs_inode	*ip,
>> +	struct xfs_dinode	*to)
>> +{
>> +	if (xfs_inode_has_nrext64(ip)) {
>> +		to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df));
>> +		to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(ip->i_afp));
>> +		/*
>> +		 * We might be upgrading the inode to use larger extent counters
>> +		 * than was previously used. Hence zero the unused field.
>> +		 */
>> +		to->di_nrext64_pad = cpu_to_be16(0);
>> +	} else {
>> +		to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
>> +		to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
>> +	}
>> +}
>> +
>>  void
>>  xfs_inode_to_disk(
>>  	struct xfs_inode	*ip,
>> @@ -296,7 +315,6 @@ xfs_inode_to_disk(
>>  	to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff);
>>  	to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16);
>>  
>> -	memset(to->di_pad, 0, sizeof(to->di_pad));
>>  	to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
>>  	to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
>>  	to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
>> @@ -307,8 +325,6 @@ xfs_inode_to_disk(
>>  	to->di_size = cpu_to_be64(ip->i_disk_size);
>>  	to->di_nblocks = cpu_to_be64(ip->i_nblocks);
>>  	to->di_extsize = cpu_to_be32(ip->i_extsize);
>> -	to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
>> -	to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
>>  	to->di_forkoff = ip->i_forkoff;
>>  	to->di_aformat = xfs_ifork_format(ip->i_afp);
>>  	to->di_flags = cpu_to_be16(ip->i_diflags);
>> @@ -323,11 +339,14 @@ xfs_inode_to_disk(
>>  		to->di_lsn = cpu_to_be64(lsn);
>>  		memset(to->di_pad2, 0, sizeof(to->di_pad2));
>>  		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
>> -		to->di_flushiter = 0;
>> +		memset(to->di_v3_pad, 0, sizeof(to->di_v3_pad));
>>  	} else {
>>  		to->di_version = 2;
>>  		to->di_flushiter = cpu_to_be16(ip->i_flushiter);
>> +		memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
>>  	}
>> +
>> +	xfs_inode_to_disk_iext_counters(ip, to);
>>  }
>>  
>>  static xfs_failaddr_t
>> @@ -397,6 +416,24 @@ xfs_dinode_verify_forkoff(
>>  	return NULL;
>>  }
>>  
>> +static xfs_failaddr_t
>> +xfs_dinode_verify_nextents(
>> +	struct xfs_mount	*mp,
>> +	struct xfs_dinode	*dip)
>> +{
>> +	if (xfs_dinode_has_nrext64(dip)) {
>> +		if (!xfs_has_nrext64(mp))
>> +			return __this_address;
>> +		if (dip->di_nrext64_pad != 0)
>> +			return __this_address;
>
> Don't we need to check that:
>
> 		if (xfs_dfork_data_extents(dip) > XFS_MAX_EXTCNT_DATA_FORK)
> 			return __this_address;
> 		if (xfs_dfork_attr_extents(dip) > XFS_MAX_EXTCNT_ATTR_FORK)
> 			return __this_address;
>
> here?
>
> OH, the actual checking of the extent count fields is in
> xfs_dinode_verify_fork, isn't it?
>
> I think that means this function exists to check the consistency of the
> nrext64 inode flag vs. the superblock nrext64 flag and the padding
> fields, right?
>
> In which case... perhaps this should be xfs_dinode_verify_nrext64() ?
>

You are right. The name xfs_dinode_verify_nextents implies that we are
verifying the values of extent counters. I will fix this.

>> +	} else {
>> +		if (dip->di_version == 3 && dip->di_big_nextents != 0)
>> +			return __this_address;
>
> We're using tagged unions in xfs_dinode, then "di_big_nextents" is
> meaningless on an inode that doesn't have NREXT64 set.  IOWs,
>
> 	} else if (dip->di_version >= 3) {
> 		if (dip->di_v3_pad != 0)
> 			return __this_address;
> 	}
>
> (Note that I changed the type of di_v3_pad above.)
>

Thanks for the suggestion. I will fix this as well.

>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>>  xfs_failaddr_t
>>  xfs_dinode_verify(
>>  	struct xfs_mount	*mp,
>> @@ -440,6 +477,10 @@ xfs_dinode_verify(
>>  	if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
>>  		return __this_address;
>>  
>> +	fa = xfs_dinode_verify_nextents(mp, dip);
>> +	if (fa)
>> +		return fa;
>> +
>>  	nextents = xfs_dfork_data_extents(dip);
>>  	nextents += xfs_dfork_attr_extents(dip);
>>  	nblocks = be64_to_cpu(dip->di_nblocks);
>> diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
>> index e56803436c61..8e6221e32660 100644
>> --- a/fs/xfs/libxfs/xfs_inode_fork.h
>> +++ b/fs/xfs/libxfs/xfs_inode_fork.h
>> @@ -156,6 +156,9 @@ static inline xfs_extnum_t
>>  xfs_dfork_data_extents(
>>  	struct xfs_dinode	*dip)
>>  {
>> +	if (xfs_dinode_has_nrext64(dip))
>> +		return be64_to_cpu(dip->di_big_nextents);
>> +
>>  	return be32_to_cpu(dip->di_nextents);
>>  }
>>  
>> @@ -163,6 +166,9 @@ static inline xfs_extnum_t
>>  xfs_dfork_attr_extents(
>>  	struct xfs_dinode	*dip)
>>  {
>> +	if (xfs_dinode_has_nrext64(dip))
>> +		return be32_to_cpu(dip->di_big_anextents);
>> +
>>  	return be16_to_cpu(dip->di_anextents);
>>  }
>>  
>> diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
>> index fd66e70248f7..7f4ebf112a3c 100644
>> --- a/fs/xfs/libxfs/xfs_log_format.h
>> +++ b/fs/xfs/libxfs/xfs_log_format.h
>> @@ -388,16 +388,30 @@ struct xfs_log_dinode {
>>  	uint32_t	di_nlink;	/* number of links to file */
>>  	uint16_t	di_projid_lo;	/* lower part of owner's project id */
>>  	uint16_t	di_projid_hi;	/* higher part of owner's project id */
>> -	uint8_t		di_pad[6];	/* unused, zeroed space */
>> -	uint16_t	di_flushiter;	/* incremented on flush */
>> +	union {
>> +		uint64_t	di_big_nextents;/* NREXT64 data extents */
>> +		uint8_t		di_v3_pad[8];	/* !NREXT64 V3 inode zeroed space */
>> +		struct {
>> +			uint8_t	di_v2_pad[6];	/* V2 inode zeroed space */
>> +			uint16_t di_flushiter;	/* V2 inode incremented on flush */
>> +		};
>> +	};
>>  	xfs_log_timestamp_t di_atime;	/* time last accessed */
>>  	xfs_log_timestamp_t di_mtime;	/* time last modified */
>>  	xfs_log_timestamp_t di_ctime;	/* time created/inode modified */
>>  	xfs_fsize_t	di_size;	/* number of bytes in file */
>>  	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
>>  	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
>> -	uint32_t	di_nextents;	/* number of extents in data fork */
>> -	uint16_t	di_anextents;	/* number of extents in attribute fork*/
>> +	union {
>> +		struct {
>> +			uint32_t  di_big_anextents; /* NREXT64 attr extents */
>> +			uint16_t  di_nrext64_pad; /* NREXT64 unused, zero */
>> +		} __packed;
>> +		struct {
>> +			uint32_t  di_nextents;	  /* !NREXT64 data extents */
>> +			uint16_t  di_anextents;	  /* !NREXT64 attr extents */
>> +		} __packed;
>> +	};
>
> I think you could apply the same transformations as I did to xfs_dinode
> above.
>

Sure. I will make relevant changes to xfs_log_dinode.

> --D
>
>>  	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
>>  	int8_t		di_aformat;	/* format of attr fork's data */
>>  	uint32_t	di_dmevmask;	/* DMIG event mask */
>> diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
>> index 90d8e591baf8..8304ce062e43 100644
>> --- a/fs/xfs/xfs_inode_item.c
>> +++ b/fs/xfs/xfs_inode_item.c
>> @@ -358,6 +358,21 @@ xfs_copy_dm_fields_to_log_dinode(
>>  	}
>>  }
>>  
>> +static inline void
>> +xfs_inode_to_log_dinode_iext_counters(
>> +	struct xfs_inode	*ip,
>> +	struct xfs_log_dinode	*to)
>> +{
>> +	if (xfs_inode_has_nrext64(ip)) {
>> +		to->di_big_nextents = xfs_ifork_nextents(&ip->i_df);
>> +		to->di_big_anextents = xfs_ifork_nextents(ip->i_afp);
>> +		to->di_nrext64_pad = 0;
>> +	} else {
>> +		to->di_nextents = xfs_ifork_nextents(&ip->i_df);
>> +		to->di_anextents = xfs_ifork_nextents(ip->i_afp);
>> +	}
>> +}
>> +
>>  static void
>>  xfs_inode_to_log_dinode(
>>  	struct xfs_inode	*ip,
>> @@ -373,7 +388,6 @@ xfs_inode_to_log_dinode(
>>  	to->di_projid_lo = ip->i_projid & 0xffff;
>>  	to->di_projid_hi = ip->i_projid >> 16;
>>  
>> -	memset(to->di_pad, 0, sizeof(to->di_pad));
>>  	memset(to->di_pad3, 0, sizeof(to->di_pad3));
>>  	to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime);
>>  	to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime);
>> @@ -385,8 +399,6 @@ xfs_inode_to_log_dinode(
>>  	to->di_size = ip->i_disk_size;
>>  	to->di_nblocks = ip->i_nblocks;
>>  	to->di_extsize = ip->i_extsize;
>> -	to->di_nextents = xfs_ifork_nextents(&ip->i_df);
>> -	to->di_anextents = xfs_ifork_nextents(ip->i_afp);
>>  	to->di_forkoff = ip->i_forkoff;
>>  	to->di_aformat = xfs_ifork_format(ip->i_afp);
>>  	to->di_flags = ip->i_diflags;
>> @@ -406,11 +418,14 @@ xfs_inode_to_log_dinode(
>>  		to->di_lsn = lsn;
>>  		memset(to->di_pad2, 0, sizeof(to->di_pad2));
>>  		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
>> -		to->di_flushiter = 0;
>> +		memset(to->di_v3_pad, 0, sizeof(to->di_v3_pad));
>>  	} else {
>>  		to->di_version = 2;
>>  		to->di_flushiter = ip->i_flushiter;
>> +		memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
>>  	}
>> +
>> +	xfs_inode_to_log_dinode_iext_counters(ip, to);
>>  }
>>  
>>  /*
>> diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
>> index 767a551816a0..fa3556633ca9 100644
>> --- a/fs/xfs/xfs_inode_item_recover.c
>> +++ b/fs/xfs/xfs_inode_item_recover.c
>> @@ -148,6 +148,22 @@ static inline bool xfs_log_dinode_has_nrext64(const struct xfs_log_dinode *ld)
>>  	       (ld->di_flags2 & XFS_DIFLAG2_NREXT64);
>>  }
>>  
>> +static inline void
>> +xfs_log_dinode_to_disk_iext_counters(
>> +	struct xfs_log_dinode	*from,
>> +	struct xfs_dinode	*to)
>> +{
>> +	if (xfs_log_dinode_has_nrext64(from)) {
>> +		to->di_big_nextents = cpu_to_be64(from->di_big_nextents);
>> +		to->di_big_anextents = cpu_to_be32(from->di_big_anextents);
>> +		to->di_nrext64_pad = cpu_to_be16(from->di_nrext64_pad);
>> +	} else {
>> +		to->di_nextents = cpu_to_be32(from->di_nextents);
>> +		to->di_anextents = cpu_to_be16(from->di_anextents);
>> +	}
>> +
>> +}
>> +
>>  STATIC void
>>  xfs_log_dinode_to_disk(
>>  	struct xfs_log_dinode	*from,
>> @@ -164,7 +180,6 @@ xfs_log_dinode_to_disk(
>>  	to->di_nlink = cpu_to_be32(from->di_nlink);
>>  	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
>>  	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
>> -	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
>>  
>>  	to->di_atime = xfs_log_dinode_to_disk_ts(from, from->di_atime);
>>  	to->di_mtime = xfs_log_dinode_to_disk_ts(from, from->di_mtime);
>> @@ -173,8 +188,6 @@ xfs_log_dinode_to_disk(
>>  	to->di_size = cpu_to_be64(from->di_size);
>>  	to->di_nblocks = cpu_to_be64(from->di_nblocks);
>>  	to->di_extsize = cpu_to_be32(from->di_extsize);
>> -	to->di_nextents = cpu_to_be32(from->di_nextents);
>> -	to->di_anextents = cpu_to_be16(from->di_anextents);
>>  	to->di_forkoff = from->di_forkoff;
>>  	to->di_aformat = from->di_aformat;
>>  	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
>> @@ -192,10 +205,13 @@ xfs_log_dinode_to_disk(
>>  		to->di_lsn = cpu_to_be64(lsn);
>>  		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
>>  		uuid_copy(&to->di_uuid, &from->di_uuid);
>> -		to->di_flushiter = 0;
>> +		memcpy(to->di_v3_pad, from->di_v3_pad, sizeof(to->di_v3_pad));
>>  	} else {
>>  		to->di_flushiter = cpu_to_be16(from->di_flushiter);
>> +		memcpy(to->di_v2_pad, from->di_v2_pad, sizeof(to->di_v2_pad));
>>  	}
>> +
>> +	xfs_log_dinode_to_disk_iext_counters(from, to);
>>  }
>>  
>>  STATIC int
>> @@ -209,6 +225,8 @@ xlog_recover_inode_commit_pass2(
>>  	struct xfs_mount		*mp = log->l_mp;
>>  	struct xfs_buf			*bp;
>>  	struct xfs_dinode		*dip;
>> +	xfs_extnum_t                    nextents;
>> +	xfs_aextnum_t                   anextents;
>>  	int				len;
>>  	char				*src;
>>  	char				*dest;
>> @@ -348,21 +366,60 @@ xlog_recover_inode_commit_pass2(
>>  			goto out_release;
>>  		}
>>  	}
>> -	if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
>> -		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
>> +
>> +	if (xfs_log_dinode_has_nrext64(ldip)) {
>> +		if (!xfs_has_nrext64(mp) || (ldip->di_nrext64_pad != 0)) {
>> +			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
>> +				     XFS_ERRLEVEL_LOW, mp, ldip,
>> +				     sizeof(*ldip));
>> +			xfs_alert(mp,
>> +				"%s: Bad inode log record, rec ptr "PTR_FMT", "
>> +				"dino ptr "PTR_FMT", dino bp "PTR_FMT", "
>> +				"ino %Ld, xfs_has_nrext64(mp) = %d, "
>> +				"ldip->di_nrext64_pad = %u",
>> +				__func__, item, dip, bp, in_f->ilf_ino,
>> +				xfs_has_nrext64(mp), ldip->di_nrext64_pad);
>> +			error = -EFSCORRUPTED;
>> +			goto out_release;
>> +		}
>> +	} else {
>> +		if (ldip->di_version == 3 && ldip->di_big_nextents != 0) {
>> +			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
>> +				     XFS_ERRLEVEL_LOW, mp, ldip,
>> +				     sizeof(*ldip));
>> +			xfs_alert(mp,
>> +				"%s: Bad inode log record, rec ptr "PTR_FMT", "
>> +				"dino ptr "PTR_FMT", dino bp "PTR_FMT", "
>> +				"ino %Ld, ldip->di_big_dextcnt = %llu",
>> +				__func__, item, dip, bp, in_f->ilf_ino,
>> +				ldip->di_big_nextents);
>> +			error = -EFSCORRUPTED;
>> +			goto out_release;
>> +		}
>> +	}
>> +
>> +	if (xfs_log_dinode_has_nrext64(ldip)) {
>> +		nextents = ldip->di_big_nextents;
>> +		anextents = ldip->di_big_anextents;
>> +	} else {
>> +		nextents = ldip->di_nextents;
>> +		anextents = ldip->di_anextents;
>> +	}
>> +
>> +	if (unlikely(nextents + anextents > ldip->di_nblocks)) {
>> +		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
>>  				     XFS_ERRLEVEL_LOW, mp, ldip,
>>  				     sizeof(*ldip));
>>  		xfs_alert(mp,
>>  	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
>> -	"dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
>> +	"dino bp "PTR_FMT", ino %Ld, total extents = %llu, nblocks = %Ld",
>>  			__func__, item, dip, bp, in_f->ilf_ino,
>> -			ldip->di_nextents + ldip->di_anextents,
>> -			ldip->di_nblocks);
>> +			nextents + anextents, ldip->di_nblocks);
>>  		error = -EFSCORRUPTED;
>>  		goto out_release;
>>  	}
>>  	if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
>> -		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
>> +		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(8)",
>>  				     XFS_ERRLEVEL_LOW, mp, ldip,
>>  				     sizeof(*ldip));
>>  		xfs_alert(mp,
>> @@ -374,7 +431,7 @@ xlog_recover_inode_commit_pass2(
>>  	}
>>  	isize = xfs_log_dinode_size(mp);
>>  	if (unlikely(item->ri_buf[1].i_len > isize)) {
>> -		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
>> +		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(9)",
>>  				     XFS_ERRLEVEL_LOW, mp, ldip,
>>  				     sizeof(*ldip));
>>  		xfs_alert(mp,
>> -- 
>> 2.30.2
>>
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index d3dfd45c39e0..df1d6ec39c45 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -792,16 +792,30 @@  struct xfs_dinode {
 	__be32		di_nlink;	/* number of links to file */
 	__be16		di_projid_lo;	/* lower part of owner's project id */
 	__be16		di_projid_hi;	/* higher part owner's project id */
-	__u8		di_pad[6];	/* unused, zeroed space */
-	__be16		di_flushiter;	/* incremented on flush */
+	union {
+		__be64	di_big_nextents;/* NREXT64 data extents */
+		__u8	di_v3_pad[8];	/* !NREXT64 V3 inode zeroed space */
+		struct {
+			__u8	di_v2_pad[6];	/* V2 inode zeroed space */
+			__be16	di_flushiter;	/* V2 inode incremented on flush */
+		};
+	};
 	xfs_timestamp_t	di_atime;	/* time last accessed */
 	xfs_timestamp_t	di_mtime;	/* time last modified */
 	xfs_timestamp_t	di_ctime;	/* time created/inode modified */
 	__be64		di_size;	/* number of bytes in file */
 	__be64		di_nblocks;	/* # of direct & btree blocks used */
 	__be32		di_extsize;	/* basic/minimum extent size for file */
-	__be32		di_nextents;	/* number of extents in data fork */
-	__be16		di_anextents;	/* number of extents in attribute fork*/
+	union {
+		struct {
+			__be32	di_big_anextents; /* NREXT64 attr extents */
+			__be16	di_nrext64_pad; /* NREXT64 unused, zero */
+		} __packed;
+		struct {
+			__be32	di_nextents;	/* !NREXT64 data extents */
+			__be16	di_anextents;	/* !NREXT64 attr extents */
+		} __packed;
+	};
 	__u8		di_forkoff;	/* attr fork offs, <<3 for 64b align */
 	__s8		di_aformat;	/* format of attr fork's data */
 	__be32		di_dmevmask;	/* DMIG event mask */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 34f360a38603..2200526bcee0 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -279,6 +279,25 @@  xfs_inode_to_disk_ts(
 	return ts;
 }
 
+static inline void
+xfs_inode_to_disk_iext_counters(
+	struct xfs_inode	*ip,
+	struct xfs_dinode	*to)
+{
+	if (xfs_inode_has_nrext64(ip)) {
+		to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df));
+		to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(ip->i_afp));
+		/*
+		 * We might be upgrading the inode to use larger extent counters
+		 * than was previously used. Hence zero the unused field.
+		 */
+		to->di_nrext64_pad = cpu_to_be16(0);
+	} else {
+		to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
+		to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
+	}
+}
+
 void
 xfs_inode_to_disk(
 	struct xfs_inode	*ip,
@@ -296,7 +315,6 @@  xfs_inode_to_disk(
 	to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff);
 	to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16);
 
-	memset(to->di_pad, 0, sizeof(to->di_pad));
 	to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
 	to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
 	to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
@@ -307,8 +325,6 @@  xfs_inode_to_disk(
 	to->di_size = cpu_to_be64(ip->i_disk_size);
 	to->di_nblocks = cpu_to_be64(ip->i_nblocks);
 	to->di_extsize = cpu_to_be32(ip->i_extsize);
-	to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
-	to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
 	to->di_forkoff = ip->i_forkoff;
 	to->di_aformat = xfs_ifork_format(ip->i_afp);
 	to->di_flags = cpu_to_be16(ip->i_diflags);
@@ -323,11 +339,14 @@  xfs_inode_to_disk(
 		to->di_lsn = cpu_to_be64(lsn);
 		memset(to->di_pad2, 0, sizeof(to->di_pad2));
 		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
-		to->di_flushiter = 0;
+		memset(to->di_v3_pad, 0, sizeof(to->di_v3_pad));
 	} else {
 		to->di_version = 2;
 		to->di_flushiter = cpu_to_be16(ip->i_flushiter);
+		memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
 	}
+
+	xfs_inode_to_disk_iext_counters(ip, to);
 }
 
 static xfs_failaddr_t
@@ -397,6 +416,24 @@  xfs_dinode_verify_forkoff(
 	return NULL;
 }
 
+static xfs_failaddr_t
+xfs_dinode_verify_nextents(
+	struct xfs_mount	*mp,
+	struct xfs_dinode	*dip)
+{
+	if (xfs_dinode_has_nrext64(dip)) {
+		if (!xfs_has_nrext64(mp))
+			return __this_address;
+		if (dip->di_nrext64_pad != 0)
+			return __this_address;
+	} else {
+		if (dip->di_version == 3 && dip->di_big_nextents != 0)
+			return __this_address;
+	}
+
+	return NULL;
+}
+
 xfs_failaddr_t
 xfs_dinode_verify(
 	struct xfs_mount	*mp,
@@ -440,6 +477,10 @@  xfs_dinode_verify(
 	if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
 		return __this_address;
 
+	fa = xfs_dinode_verify_nextents(mp, dip);
+	if (fa)
+		return fa;
+
 	nextents = xfs_dfork_data_extents(dip);
 	nextents += xfs_dfork_attr_extents(dip);
 	nblocks = be64_to_cpu(dip->di_nblocks);
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index e56803436c61..8e6221e32660 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -156,6 +156,9 @@  static inline xfs_extnum_t
 xfs_dfork_data_extents(
 	struct xfs_dinode	*dip)
 {
+	if (xfs_dinode_has_nrext64(dip))
+		return be64_to_cpu(dip->di_big_nextents);
+
 	return be32_to_cpu(dip->di_nextents);
 }
 
@@ -163,6 +166,9 @@  static inline xfs_extnum_t
 xfs_dfork_attr_extents(
 	struct xfs_dinode	*dip)
 {
+	if (xfs_dinode_has_nrext64(dip))
+		return be32_to_cpu(dip->di_big_anextents);
+
 	return be16_to_cpu(dip->di_anextents);
 }
 
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index fd66e70248f7..7f4ebf112a3c 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -388,16 +388,30 @@  struct xfs_log_dinode {
 	uint32_t	di_nlink;	/* number of links to file */
 	uint16_t	di_projid_lo;	/* lower part of owner's project id */
 	uint16_t	di_projid_hi;	/* higher part of owner's project id */
-	uint8_t		di_pad[6];	/* unused, zeroed space */
-	uint16_t	di_flushiter;	/* incremented on flush */
+	union {
+		uint64_t	di_big_nextents;/* NREXT64 data extents */
+		uint8_t		di_v3_pad[8];	/* !NREXT64 V3 inode zeroed space */
+		struct {
+			uint8_t	di_v2_pad[6];	/* V2 inode zeroed space */
+			uint16_t di_flushiter;	/* V2 inode incremented on flush */
+		};
+	};
 	xfs_log_timestamp_t di_atime;	/* time last accessed */
 	xfs_log_timestamp_t di_mtime;	/* time last modified */
 	xfs_log_timestamp_t di_ctime;	/* time created/inode modified */
 	xfs_fsize_t	di_size;	/* number of bytes in file */
 	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
 	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
-	uint32_t	di_nextents;	/* number of extents in data fork */
-	uint16_t	di_anextents;	/* number of extents in attribute fork*/
+	union {
+		struct {
+			uint32_t  di_big_anextents; /* NREXT64 attr extents */
+			uint16_t  di_nrext64_pad; /* NREXT64 unused, zero */
+		} __packed;
+		struct {
+			uint32_t  di_nextents;	  /* !NREXT64 data extents */
+			uint16_t  di_anextents;	  /* !NREXT64 attr extents */
+		} __packed;
+	};
 	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
 	int8_t		di_aformat;	/* format of attr fork's data */
 	uint32_t	di_dmevmask;	/* DMIG event mask */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 90d8e591baf8..8304ce062e43 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -358,6 +358,21 @@  xfs_copy_dm_fields_to_log_dinode(
 	}
 }
 
+static inline void
+xfs_inode_to_log_dinode_iext_counters(
+	struct xfs_inode	*ip,
+	struct xfs_log_dinode	*to)
+{
+	if (xfs_inode_has_nrext64(ip)) {
+		to->di_big_nextents = xfs_ifork_nextents(&ip->i_df);
+		to->di_big_anextents = xfs_ifork_nextents(ip->i_afp);
+		to->di_nrext64_pad = 0;
+	} else {
+		to->di_nextents = xfs_ifork_nextents(&ip->i_df);
+		to->di_anextents = xfs_ifork_nextents(ip->i_afp);
+	}
+}
+
 static void
 xfs_inode_to_log_dinode(
 	struct xfs_inode	*ip,
@@ -373,7 +388,6 @@  xfs_inode_to_log_dinode(
 	to->di_projid_lo = ip->i_projid & 0xffff;
 	to->di_projid_hi = ip->i_projid >> 16;
 
-	memset(to->di_pad, 0, sizeof(to->di_pad));
 	memset(to->di_pad3, 0, sizeof(to->di_pad3));
 	to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime);
 	to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime);
@@ -385,8 +399,6 @@  xfs_inode_to_log_dinode(
 	to->di_size = ip->i_disk_size;
 	to->di_nblocks = ip->i_nblocks;
 	to->di_extsize = ip->i_extsize;
-	to->di_nextents = xfs_ifork_nextents(&ip->i_df);
-	to->di_anextents = xfs_ifork_nextents(ip->i_afp);
 	to->di_forkoff = ip->i_forkoff;
 	to->di_aformat = xfs_ifork_format(ip->i_afp);
 	to->di_flags = ip->i_diflags;
@@ -406,11 +418,14 @@  xfs_inode_to_log_dinode(
 		to->di_lsn = lsn;
 		memset(to->di_pad2, 0, sizeof(to->di_pad2));
 		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
-		to->di_flushiter = 0;
+		memset(to->di_v3_pad, 0, sizeof(to->di_v3_pad));
 	} else {
 		to->di_version = 2;
 		to->di_flushiter = ip->i_flushiter;
+		memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
 	}
+
+	xfs_inode_to_log_dinode_iext_counters(ip, to);
 }
 
 /*
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index 767a551816a0..fa3556633ca9 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -148,6 +148,22 @@  static inline bool xfs_log_dinode_has_nrext64(const struct xfs_log_dinode *ld)
 	       (ld->di_flags2 & XFS_DIFLAG2_NREXT64);
 }
 
+static inline void
+xfs_log_dinode_to_disk_iext_counters(
+	struct xfs_log_dinode	*from,
+	struct xfs_dinode	*to)
+{
+	if (xfs_log_dinode_has_nrext64(from)) {
+		to->di_big_nextents = cpu_to_be64(from->di_big_nextents);
+		to->di_big_anextents = cpu_to_be32(from->di_big_anextents);
+		to->di_nrext64_pad = cpu_to_be16(from->di_nrext64_pad);
+	} else {
+		to->di_nextents = cpu_to_be32(from->di_nextents);
+		to->di_anextents = cpu_to_be16(from->di_anextents);
+	}
+
+}
+
 STATIC void
 xfs_log_dinode_to_disk(
 	struct xfs_log_dinode	*from,
@@ -164,7 +180,6 @@  xfs_log_dinode_to_disk(
 	to->di_nlink = cpu_to_be32(from->di_nlink);
 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
-	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
 
 	to->di_atime = xfs_log_dinode_to_disk_ts(from, from->di_atime);
 	to->di_mtime = xfs_log_dinode_to_disk_ts(from, from->di_mtime);
@@ -173,8 +188,6 @@  xfs_log_dinode_to_disk(
 	to->di_size = cpu_to_be64(from->di_size);
 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
 	to->di_extsize = cpu_to_be32(from->di_extsize);
-	to->di_nextents = cpu_to_be32(from->di_nextents);
-	to->di_anextents = cpu_to_be16(from->di_anextents);
 	to->di_forkoff = from->di_forkoff;
 	to->di_aformat = from->di_aformat;
 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
@@ -192,10 +205,13 @@  xfs_log_dinode_to_disk(
 		to->di_lsn = cpu_to_be64(lsn);
 		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
 		uuid_copy(&to->di_uuid, &from->di_uuid);
-		to->di_flushiter = 0;
+		memcpy(to->di_v3_pad, from->di_v3_pad, sizeof(to->di_v3_pad));
 	} else {
 		to->di_flushiter = cpu_to_be16(from->di_flushiter);
+		memcpy(to->di_v2_pad, from->di_v2_pad, sizeof(to->di_v2_pad));
 	}
+
+	xfs_log_dinode_to_disk_iext_counters(from, to);
 }
 
 STATIC int
@@ -209,6 +225,8 @@  xlog_recover_inode_commit_pass2(
 	struct xfs_mount		*mp = log->l_mp;
 	struct xfs_buf			*bp;
 	struct xfs_dinode		*dip;
+	xfs_extnum_t                    nextents;
+	xfs_aextnum_t                   anextents;
 	int				len;
 	char				*src;
 	char				*dest;
@@ -348,21 +366,60 @@  xlog_recover_inode_commit_pass2(
 			goto out_release;
 		}
 	}
-	if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
-		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
+
+	if (xfs_log_dinode_has_nrext64(ldip)) {
+		if (!xfs_has_nrext64(mp) || (ldip->di_nrext64_pad != 0)) {
+			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
+				     XFS_ERRLEVEL_LOW, mp, ldip,
+				     sizeof(*ldip));
+			xfs_alert(mp,
+				"%s: Bad inode log record, rec ptr "PTR_FMT", "
+				"dino ptr "PTR_FMT", dino bp "PTR_FMT", "
+				"ino %Ld, xfs_has_nrext64(mp) = %d, "
+				"ldip->di_nrext64_pad = %u",
+				__func__, item, dip, bp, in_f->ilf_ino,
+				xfs_has_nrext64(mp), ldip->di_nrext64_pad);
+			error = -EFSCORRUPTED;
+			goto out_release;
+		}
+	} else {
+		if (ldip->di_version == 3 && ldip->di_big_nextents != 0) {
+			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
+				     XFS_ERRLEVEL_LOW, mp, ldip,
+				     sizeof(*ldip));
+			xfs_alert(mp,
+				"%s: Bad inode log record, rec ptr "PTR_FMT", "
+				"dino ptr "PTR_FMT", dino bp "PTR_FMT", "
+				"ino %Ld, ldip->di_big_dextcnt = %llu",
+				__func__, item, dip, bp, in_f->ilf_ino,
+				ldip->di_big_nextents);
+			error = -EFSCORRUPTED;
+			goto out_release;
+		}
+	}
+
+	if (xfs_log_dinode_has_nrext64(ldip)) {
+		nextents = ldip->di_big_nextents;
+		anextents = ldip->di_big_anextents;
+	} else {
+		nextents = ldip->di_nextents;
+		anextents = ldip->di_anextents;
+	}
+
+	if (unlikely(nextents + anextents > ldip->di_nblocks)) {
+		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
 				     XFS_ERRLEVEL_LOW, mp, ldip,
 				     sizeof(*ldip));
 		xfs_alert(mp,
 	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
-	"dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
+	"dino bp "PTR_FMT", ino %Ld, total extents = %llu, nblocks = %Ld",
 			__func__, item, dip, bp, in_f->ilf_ino,
-			ldip->di_nextents + ldip->di_anextents,
-			ldip->di_nblocks);
+			nextents + anextents, ldip->di_nblocks);
 		error = -EFSCORRUPTED;
 		goto out_release;
 	}
 	if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
-		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
+		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(8)",
 				     XFS_ERRLEVEL_LOW, mp, ldip,
 				     sizeof(*ldip));
 		xfs_alert(mp,
@@ -374,7 +431,7 @@  xlog_recover_inode_commit_pass2(
 	}
 	isize = xfs_log_dinode_size(mp);
 	if (unlikely(item->ri_buf[1].i_len > isize)) {
-		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
+		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(9)",
 				     XFS_ERRLEVEL_LOW, mp, ldip,
 				     sizeof(*ldip));
 		xfs_alert(mp,