diff mbox series

[V5,2/2] common/populate: Ensure that S_IFDIR.FMT_BTREE is in btree format

Message ID 20221208072843.1866615-3-ZiyangZhang@linux.alibaba.com (mailing list archive)
State Superseded
Headers show
Series cleanup and bugfix for xfs tests related to btree format | expand

Commit Message

Ziyang Zhang Dec. 8, 2022, 7:28 a.m. UTC
Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
S_IFDIR.FMT_BTREE could become btree format for its DATA fork.

Actually we just observed it can fail after apply our inode
extent-to-btree workaround. The root cause is that the kernel may be
too good at allocating consecutive blocks so that the data fork is
still in extents format.

Therefore instead of using a fixed number, let's make sure the number
of extents is large enough than (inode size - inode core size) /
sizeof(xfs_bmbt_rec_t).

Reviewed-by: Zorro Lang <zlang@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Suggested-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
---
 common/populate | 34 +++++++++++++++++++++++++++++++++-
 common/xfs      |  9 +++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

Comments

Darrick J. Wong Dec. 9, 2022, 4:37 p.m. UTC | #1
On Thu, Dec 08, 2022 at 03:28:43PM +0800, Ziyang Zhang wrote:
> Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
> S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
> 
> Actually we just observed it can fail after apply our inode
> extent-to-btree workaround. The root cause is that the kernel may be
> too good at allocating consecutive blocks so that the data fork is
> still in extents format.
> 
> Therefore instead of using a fixed number, let's make sure the number
> of extents is large enough than (inode size - inode core size) /
> sizeof(xfs_bmbt_rec_t).
> 
> Reviewed-by: Zorro Lang <zlang@redhat.com>
> Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> Suggested-by: "Darrick J. Wong" <djwong@kernel.org>
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
> ---
>  common/populate | 34 +++++++++++++++++++++++++++++++++-
>  common/xfs      |  9 +++++++++
>  2 files changed, 42 insertions(+), 1 deletion(-)
> 
> diff --git a/common/populate b/common/populate
> index 6e004997..0d334a13 100644
> --- a/common/populate
> +++ b/common/populate
> @@ -71,6 +71,37 @@ __populate_create_dir() {
>  	done
>  }
>  
> +# Create a large directory and ensure that it's a btree format
> +__populate_xfs_create_btree_dir() {
> +	local name="$1"
> +	local isize="$2"
> +	local missing="$3"
> +	local icore_size="$(_xfs_inode_core_bytes)"

Doesn't this helper require a path argument now?

--D

> +	# We need enough extents to guarantee that the data fork is in
> +	# btree format.  Cycling the mount to use xfs_db is too slow, so
> +	# watch for when the extent count exceeds the space after the
> +	# inode core.
> +	local max_nextents="$(((isize - icore_size) / 16))"
> +	local nr=0
> +
> +	mkdir -p "${name}"
> +	while true; do
> +		local creat=mkdir
> +		test "$((nr % 20))" -eq 0 && creat=touch
> +		$creat "${name}/$(printf "%.08d" "$nr")"
> +		if [ "$((nr % 40))" -eq 0 ]; then
> +			local nextents="$(_xfs_get_fsxattr nextents $name)"
> +			[ $nextents -gt $max_nextents ] && break
> +		fi
> +		nr=$((nr+1))
> +	done
> +
> +	test -z "${missing}" && return
> +	seq 1 2 "${nr}" | while read d; do
> +		rm -rf "${name}/$(printf "%.08d" "$d")"
> +	done
> +}
> +
>  # Add a bunch of attrs to a file
>  __populate_create_attr() {
>  	name="$1"
> @@ -176,6 +207,7 @@ _scratch_xfs_populate() {
>  
>  	blksz="$(stat -f -c '%s' "${SCRATCH_MNT}")"
>  	dblksz="$(_xfs_get_dir_blocksize "$SCRATCH_MNT")"
> +	isize="$(_xfs_get_inode_size "$SCRATCH_MNT")"
>  	crc="$(_xfs_has_feature "$SCRATCH_MNT" crc -v)"
>  	if [ $crc -eq 1 ]; then
>  		leaf_hdr_size=64
> @@ -226,7 +258,7 @@ _scratch_xfs_populate() {
>  
>  	# - BTREE
>  	echo "+ btree dir"
> -	__populate_create_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$((128 * dblksz / 40))" true
> +	__populate_xfs_create_btree_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$isize" true
>  
>  	# Symlinks
>  	# - FMT_LOCAL
> diff --git a/common/xfs b/common/xfs
> index 674384a9..7aaa63c7 100644
> --- a/common/xfs
> +++ b/common/xfs
> @@ -1487,6 +1487,15 @@ _require_xfsrestore_xflag()
>  			_notrun 'xfsrestore does not support -x flag.'
>  }
>  
> +# Number of bytes reserved for a full inode record, which includes the
> +# immediate fork areas.
> +_xfs_get_inode_size()
> +{
> +	local mntpoint="$1"
> +
> +	$XFS_INFO_PROG "$mntpoint" | sed -n '/meta-data=.*isize/s/^.*isize=\([0-9]*\).*$/\1/p'
> +}
> +
>  # Number of bytes reserved for only the inode record, excluding the
>  # immediate fork areas.
>  _xfs_get_inode_core_bytes()
> -- 
> 2.18.4
>
Zorro Lang Dec. 10, 2022, 1:57 p.m. UTC | #2
On Fri, Dec 09, 2022 at 08:37:41AM -0800, Darrick J. Wong wrote:
> On Thu, Dec 08, 2022 at 03:28:43PM +0800, Ziyang Zhang wrote:
> > Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
> > S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
> > 
> > Actually we just observed it can fail after apply our inode
> > extent-to-btree workaround. The root cause is that the kernel may be
> > too good at allocating consecutive blocks so that the data fork is
> > still in extents format.
> > 
> > Therefore instead of using a fixed number, let's make sure the number
> > of extents is large enough than (inode size - inode core size) /
> > sizeof(xfs_bmbt_rec_t).
> > 
> > Reviewed-by: Zorro Lang <zlang@redhat.com>
> > Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> > Suggested-by: "Darrick J. Wong" <djwong@kernel.org>
> > Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> > Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
> > ---
> >  common/populate | 34 +++++++++++++++++++++++++++++++++-
> >  common/xfs      |  9 +++++++++
> >  2 files changed, 42 insertions(+), 1 deletion(-)
> > 
> > diff --git a/common/populate b/common/populate
> > index 6e004997..0d334a13 100644
> > --- a/common/populate
> > +++ b/common/populate
> > @@ -71,6 +71,37 @@ __populate_create_dir() {
> >  	done
> >  }
> >  
> > +# Create a large directory and ensure that it's a btree format
> > +__populate_xfs_create_btree_dir() {
> > +	local name="$1"
> > +	local isize="$2"
> > +	local missing="$3"
> > +	local icore_size="$(_xfs_inode_core_bytes)"
> 
> Doesn't this helper require a path argument now?

What kind of "path" argument? I think he copy it from __populate_create_dir(),
and keep using the "name" as the root path to create files/dir.

> 
> --D
> 
> > +	# We need enough extents to guarantee that the data fork is in
> > +	# btree format.  Cycling the mount to use xfs_db is too slow, so
> > +	# watch for when the extent count exceeds the space after the
> > +	# inode core.
> > +	local max_nextents="$(((isize - icore_size) / 16))"
> > +	local nr=0
> > +
> > +	mkdir -p "${name}"
> > +	while true; do
> > +		local creat=mkdir
> > +		test "$((nr % 20))" -eq 0 && creat=touch
> > +		$creat "${name}/$(printf "%.08d" "$nr")"
> > +		if [ "$((nr % 40))" -eq 0 ]; then
> > +			local nextents="$(_xfs_get_fsxattr nextents $name)"
> > +			[ $nextents -gt $max_nextents ] && break
> > +		fi
> > +		nr=$((nr+1))
> > +	done
> > +
> > +	test -z "${missing}" && return
> > +	seq 1 2 "${nr}" | while read d; do
> > +		rm -rf "${name}/$(printf "%.08d" "$d")"
> > +	done
> > +}
> > +
> >  # Add a bunch of attrs to a file
> >  __populate_create_attr() {
> >  	name="$1"
> > @@ -176,6 +207,7 @@ _scratch_xfs_populate() {
> >  
> >  	blksz="$(stat -f -c '%s' "${SCRATCH_MNT}")"
> >  	dblksz="$(_xfs_get_dir_blocksize "$SCRATCH_MNT")"
> > +	isize="$(_xfs_get_inode_size "$SCRATCH_MNT")"
> >  	crc="$(_xfs_has_feature "$SCRATCH_MNT" crc -v)"
> >  	if [ $crc -eq 1 ]; then
> >  		leaf_hdr_size=64
> > @@ -226,7 +258,7 @@ _scratch_xfs_populate() {
> >  
> >  	# - BTREE
> >  	echo "+ btree dir"
> > -	__populate_create_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$((128 * dblksz / 40))" true
> > +	__populate_xfs_create_btree_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$isize" true
> >  
> >  	# Symlinks
> >  	# - FMT_LOCAL
> > diff --git a/common/xfs b/common/xfs
> > index 674384a9..7aaa63c7 100644
> > --- a/common/xfs
> > +++ b/common/xfs
> > @@ -1487,6 +1487,15 @@ _require_xfsrestore_xflag()
> >  			_notrun 'xfsrestore does not support -x flag.'
> >  }
> >  
> > +# Number of bytes reserved for a full inode record, which includes the
> > +# immediate fork areas.
> > +_xfs_get_inode_size()
> > +{
> > +	local mntpoint="$1"
> > +
> > +	$XFS_INFO_PROG "$mntpoint" | sed -n '/meta-data=.*isize/s/^.*isize=\([0-9]*\).*$/\1/p'
> > +}
> > +
> >  # Number of bytes reserved for only the inode record, excluding the
> >  # immediate fork areas.
> >  _xfs_get_inode_core_bytes()
> > -- 
> > 2.18.4
> > 
>
Darrick J. Wong Dec. 11, 2022, 2:30 a.m. UTC | #3
On Sat, Dec 10, 2022 at 09:57:24PM +0800, Zorro Lang wrote:
> On Fri, Dec 09, 2022 at 08:37:41AM -0800, Darrick J. Wong wrote:
> > On Thu, Dec 08, 2022 at 03:28:43PM +0800, Ziyang Zhang wrote:
> > > Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
> > > S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
> > > 
> > > Actually we just observed it can fail after apply our inode
> > > extent-to-btree workaround. The root cause is that the kernel may be
> > > too good at allocating consecutive blocks so that the data fork is
> > > still in extents format.
> > > 
> > > Therefore instead of using a fixed number, let's make sure the number
> > > of extents is large enough than (inode size - inode core size) /
> > > sizeof(xfs_bmbt_rec_t).
> > > 
> > > Reviewed-by: Zorro Lang <zlang@redhat.com>
> > > Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> > > Suggested-by: "Darrick J. Wong" <djwong@kernel.org>
> > > Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> > > Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
> > > ---
> > >  common/populate | 34 +++++++++++++++++++++++++++++++++-
> > >  common/xfs      |  9 +++++++++
> > >  2 files changed, 42 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/common/populate b/common/populate
> > > index 6e004997..0d334a13 100644
> > > --- a/common/populate
> > > +++ b/common/populate
> > > @@ -71,6 +71,37 @@ __populate_create_dir() {
> > >  	done
> > >  }
> > >  
> > > +# Create a large directory and ensure that it's a btree format
> > > +__populate_xfs_create_btree_dir() {
> > > +	local name="$1"
> > > +	local isize="$2"
> > > +	local missing="$3"
> > > +	local icore_size="$(_xfs_inode_core_bytes)"
> > 
> > Doesn't this helper require a path argument now?
> 
> What kind of "path" argument? I think he copy it from __populate_create_dir(),
> and keep using the "name" as the root path to create files/dir.

This path argument, from
https://lore.kernel.org/fstests/20221208072843.1866615-2-ZiyangZhang@linux.alibaba.com/T/#u

+# Number of bytes reserved for only the inode record, excluding the
+# immediate fork areas.
+_xfs_get_inode_core_bytes()
+{
+	local dir="$1"
+
+	if _xfs_has_feature "$dir" crc; then

--D

> > 
> > --D
> > 
> > > +	# We need enough extents to guarantee that the data fork is in
> > > +	# btree format.  Cycling the mount to use xfs_db is too slow, so
> > > +	# watch for when the extent count exceeds the space after the
> > > +	# inode core.
> > > +	local max_nextents="$(((isize - icore_size) / 16))"
> > > +	local nr=0
> > > +
> > > +	mkdir -p "${name}"
> > > +	while true; do
> > > +		local creat=mkdir
> > > +		test "$((nr % 20))" -eq 0 && creat=touch
> > > +		$creat "${name}/$(printf "%.08d" "$nr")"
> > > +		if [ "$((nr % 40))" -eq 0 ]; then
> > > +			local nextents="$(_xfs_get_fsxattr nextents $name)"
> > > +			[ $nextents -gt $max_nextents ] && break
> > > +		fi
> > > +		nr=$((nr+1))
> > > +	done
> > > +
> > > +	test -z "${missing}" && return
> > > +	seq 1 2 "${nr}" | while read d; do
> > > +		rm -rf "${name}/$(printf "%.08d" "$d")"
> > > +	done
> > > +}
> > > +
> > >  # Add a bunch of attrs to a file
> > >  __populate_create_attr() {
> > >  	name="$1"
> > > @@ -176,6 +207,7 @@ _scratch_xfs_populate() {
> > >  
> > >  	blksz="$(stat -f -c '%s' "${SCRATCH_MNT}")"
> > >  	dblksz="$(_xfs_get_dir_blocksize "$SCRATCH_MNT")"
> > > +	isize="$(_xfs_get_inode_size "$SCRATCH_MNT")"
> > >  	crc="$(_xfs_has_feature "$SCRATCH_MNT" crc -v)"
> > >  	if [ $crc -eq 1 ]; then
> > >  		leaf_hdr_size=64
> > > @@ -226,7 +258,7 @@ _scratch_xfs_populate() {
> > >  
> > >  	# - BTREE
> > >  	echo "+ btree dir"
> > > -	__populate_create_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$((128 * dblksz / 40))" true
> > > +	__populate_xfs_create_btree_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$isize" true
> > >  
> > >  	# Symlinks
> > >  	# - FMT_LOCAL
> > > diff --git a/common/xfs b/common/xfs
> > > index 674384a9..7aaa63c7 100644
> > > --- a/common/xfs
> > > +++ b/common/xfs
> > > @@ -1487,6 +1487,15 @@ _require_xfsrestore_xflag()
> > >  			_notrun 'xfsrestore does not support -x flag.'
> > >  }
> > >  
> > > +# Number of bytes reserved for a full inode record, which includes the
> > > +# immediate fork areas.
> > > +_xfs_get_inode_size()
> > > +{
> > > +	local mntpoint="$1"
> > > +
> > > +	$XFS_INFO_PROG "$mntpoint" | sed -n '/meta-data=.*isize/s/^.*isize=\([0-9]*\).*$/\1/p'
> > > +}
> > > +
> > >  # Number of bytes reserved for only the inode record, excluding the
> > >  # immediate fork areas.
> > >  _xfs_get_inode_core_bytes()
> > > -- 
> > > 2.18.4
> > > 
> > 
>
Zorro Lang Dec. 11, 2022, 11:01 a.m. UTC | #4
On Sat, Dec 10, 2022 at 06:30:10PM -0800, Darrick J. Wong wrote:
> On Sat, Dec 10, 2022 at 09:57:24PM +0800, Zorro Lang wrote:
> > On Fri, Dec 09, 2022 at 08:37:41AM -0800, Darrick J. Wong wrote:
> > > On Thu, Dec 08, 2022 at 03:28:43PM +0800, Ziyang Zhang wrote:
> > > > Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
> > > > S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
> > > > 
> > > > Actually we just observed it can fail after apply our inode
> > > > extent-to-btree workaround. The root cause is that the kernel may be
> > > > too good at allocating consecutive blocks so that the data fork is
> > > > still in extents format.
> > > > 
> > > > Therefore instead of using a fixed number, let's make sure the number
> > > > of extents is large enough than (inode size - inode core size) /
> > > > sizeof(xfs_bmbt_rec_t).
> > > > 
> > > > Reviewed-by: Zorro Lang <zlang@redhat.com>
> > > > Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> > > > Suggested-by: "Darrick J. Wong" <djwong@kernel.org>
> > > > Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> > > > Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
> > > > ---
> > > >  common/populate | 34 +++++++++++++++++++++++++++++++++-
> > > >  common/xfs      |  9 +++++++++
> > > >  2 files changed, 42 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/common/populate b/common/populate
> > > > index 6e004997..0d334a13 100644
> > > > --- a/common/populate
> > > > +++ b/common/populate
> > > > @@ -71,6 +71,37 @@ __populate_create_dir() {
> > > >  	done
> > > >  }
> > > >  
> > > > +# Create a large directory and ensure that it's a btree format
> > > > +__populate_xfs_create_btree_dir() {
> > > > +	local name="$1"
> > > > +	local isize="$2"
> > > > +	local missing="$3"
> > > > +	local icore_size="$(_xfs_inode_core_bytes)"
> > > 
> > > Doesn't this helper require a path argument now?
> > 
> > What kind of "path" argument? I think he copy it from __populate_create_dir(),
> > and keep using the "name" as the root path to create files/dir.
> 
> This path argument, from
> https://lore.kernel.org/fstests/20221208072843.1866615-2-ZiyangZhang@linux.alibaba.com/T/#u
> 
> +# Number of bytes reserved for only the inode record, excluding the
> +# immediate fork areas.
> +_xfs_get_inode_core_bytes()
> +{
> +	local dir="$1"
> +
> +	if _xfs_has_feature "$dir" crc; then

Oh, sorry I thought you mean __populate_xfs_create_btree_dir() require a path
argument...

Yes, and this helper's name should be _xfs_get_inode_core_bytes(), not
_xfs_inode_core_bytes(). I didn't find _xfs_inode_core_bytes from current
fstests. So it should be:

  local icore_size="$(_xfs_get_inode_core_bytes $SCRATCH_MNT)"

Hi Ziyang,

Please modify this place, and check all other places which call
_xfs_get_inode_core_bytes and _xfs_get_inode_size, to make sure
they're all changed correctly.

Thanks,
Zorro


> 
> --D
> 
> > > 
> > > --D
> > > 
> > > > +	# We need enough extents to guarantee that the data fork is in
> > > > +	# btree format.  Cycling the mount to use xfs_db is too slow, so
> > > > +	# watch for when the extent count exceeds the space after the
> > > > +	# inode core.
> > > > +	local max_nextents="$(((isize - icore_size) / 16))"
> > > > +	local nr=0
> > > > +
> > > > +	mkdir -p "${name}"
> > > > +	while true; do
> > > > +		local creat=mkdir
> > > > +		test "$((nr % 20))" -eq 0 && creat=touch
> > > > +		$creat "${name}/$(printf "%.08d" "$nr")"
> > > > +		if [ "$((nr % 40))" -eq 0 ]; then
> > > > +			local nextents="$(_xfs_get_fsxattr nextents $name)"
> > > > +			[ $nextents -gt $max_nextents ] && break
> > > > +		fi
> > > > +		nr=$((nr+1))
> > > > +	done
> > > > +
> > > > +	test -z "${missing}" && return
> > > > +	seq 1 2 "${nr}" | while read d; do
> > > > +		rm -rf "${name}/$(printf "%.08d" "$d")"
> > > > +	done
> > > > +}
> > > > +
> > > >  # Add a bunch of attrs to a file
> > > >  __populate_create_attr() {
> > > >  	name="$1"
> > > > @@ -176,6 +207,7 @@ _scratch_xfs_populate() {
> > > >  
> > > >  	blksz="$(stat -f -c '%s' "${SCRATCH_MNT}")"
> > > >  	dblksz="$(_xfs_get_dir_blocksize "$SCRATCH_MNT")"
> > > > +	isize="$(_xfs_get_inode_size "$SCRATCH_MNT")"
> > > >  	crc="$(_xfs_has_feature "$SCRATCH_MNT" crc -v)"
> > > >  	if [ $crc -eq 1 ]; then
> > > >  		leaf_hdr_size=64
> > > > @@ -226,7 +258,7 @@ _scratch_xfs_populate() {
> > > >  
> > > >  	# - BTREE
> > > >  	echo "+ btree dir"
> > > > -	__populate_create_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$((128 * dblksz / 40))" true
> > > > +	__populate_xfs_create_btree_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$isize" true
> > > >  
> > > >  	# Symlinks
> > > >  	# - FMT_LOCAL
> > > > diff --git a/common/xfs b/common/xfs
> > > > index 674384a9..7aaa63c7 100644
> > > > --- a/common/xfs
> > > > +++ b/common/xfs
> > > > @@ -1487,6 +1487,15 @@ _require_xfsrestore_xflag()
> > > >  			_notrun 'xfsrestore does not support -x flag.'
> > > >  }
> > > >  
> > > > +# Number of bytes reserved for a full inode record, which includes the
> > > > +# immediate fork areas.
> > > > +_xfs_get_inode_size()
> > > > +{
> > > > +	local mntpoint="$1"
> > > > +
> > > > +	$XFS_INFO_PROG "$mntpoint" | sed -n '/meta-data=.*isize/s/^.*isize=\([0-9]*\).*$/\1/p'
> > > > +}
> > > > +
> > > >  # Number of bytes reserved for only the inode record, excluding the
> > > >  # immediate fork areas.
> > > >  _xfs_get_inode_core_bytes()
> > > > -- 
> > > > 2.18.4
> > > > 
> > > 
> > 
>
Zorro Lang Dec. 11, 2022, 11:23 a.m. UTC | #5
On Sun, Dec 11, 2022 at 07:01:25PM +0800, Zorro Lang wrote:
> On Sat, Dec 10, 2022 at 06:30:10PM -0800, Darrick J. Wong wrote:
> > On Sat, Dec 10, 2022 at 09:57:24PM +0800, Zorro Lang wrote:
> > > On Fri, Dec 09, 2022 at 08:37:41AM -0800, Darrick J. Wong wrote:
> > > > On Thu, Dec 08, 2022 at 03:28:43PM +0800, Ziyang Zhang wrote:
> > > > > Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
> > > > > S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
> > > > > 
> > > > > Actually we just observed it can fail after apply our inode
> > > > > extent-to-btree workaround. The root cause is that the kernel may be
> > > > > too good at allocating consecutive blocks so that the data fork is
> > > > > still in extents format.
> > > > > 
> > > > > Therefore instead of using a fixed number, let's make sure the number
> > > > > of extents is large enough than (inode size - inode core size) /
> > > > > sizeof(xfs_bmbt_rec_t).
> > > > > 
> > > > > Reviewed-by: Zorro Lang <zlang@redhat.com>
> > > > > Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> > > > > Suggested-by: "Darrick J. Wong" <djwong@kernel.org>
> > > > > Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> > > > > Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
> > > > > ---
> > > > >  common/populate | 34 +++++++++++++++++++++++++++++++++-
> > > > >  common/xfs      |  9 +++++++++
> > > > >  2 files changed, 42 insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/common/populate b/common/populate
> > > > > index 6e004997..0d334a13 100644
> > > > > --- a/common/populate
> > > > > +++ b/common/populate
> > > > > @@ -71,6 +71,37 @@ __populate_create_dir() {
> > > > >  	done
> > > > >  }
> > > > >  
> > > > > +# Create a large directory and ensure that it's a btree format
> > > > > +__populate_xfs_create_btree_dir() {
> > > > > +	local name="$1"
> > > > > +	local isize="$2"
> > > > > +	local missing="$3"
> > > > > +	local icore_size="$(_xfs_inode_core_bytes)"
> > > > 
> > > > Doesn't this helper require a path argument now?
> > > 
> > > What kind of "path" argument? I think he copy it from __populate_create_dir(),
> > > and keep using the "name" as the root path to create files/dir.
> > 
> > This path argument, from
> > https://lore.kernel.org/fstests/20221208072843.1866615-2-ZiyangZhang@linux.alibaba.com/T/#u
> > 
> > +# Number of bytes reserved for only the inode record, excluding the
> > +# immediate fork areas.
> > +_xfs_get_inode_core_bytes()
> > +{
> > +	local dir="$1"
> > +
> > +	if _xfs_has_feature "$dir" crc; then
> 
> Oh, sorry I thought you mean __populate_xfs_create_btree_dir() require a path
> argument...
> 
> Yes, and this helper's name should be _xfs_get_inode_core_bytes(), not
> _xfs_inode_core_bytes(). I didn't find _xfs_inode_core_bytes from current
> fstests. So it should be:
> 
>   local icore_size="$(_xfs_get_inode_core_bytes $SCRATCH_MNT)"

Hmm... wait a moment. The work directory of __populate_xfs_create_btree_dir() is
"local name=$1", but the "$name" maybe not a mountpoint.

So we need to get the fs dev/mnt before calling _xfs_get_inode_core_bytes:

	local icore_size="$(_xfs_get_inode_core_bytes `_df_dir $name | awk '{print $1}'`)"

Or use $TEST_MNT directly?

Thanks,
Zorro

> 
> Hi Ziyang,
> 
> Please modify this place, and check all other places which call
> _xfs_get_inode_core_bytes and _xfs_get_inode_size, to make sure
> they're all changed correctly.
> 
> Thanks,
> Zorro
> 
> 
> > 
> > --D
> > 
> > > > 
> > > > --D
> > > > 
> > > > > +	# We need enough extents to guarantee that the data fork is in
> > > > > +	# btree format.  Cycling the mount to use xfs_db is too slow, so
> > > > > +	# watch for when the extent count exceeds the space after the
> > > > > +	# inode core.
> > > > > +	local max_nextents="$(((isize - icore_size) / 16))"
> > > > > +	local nr=0
> > > > > +
> > > > > +	mkdir -p "${name}"
> > > > > +	while true; do
> > > > > +		local creat=mkdir
> > > > > +		test "$((nr % 20))" -eq 0 && creat=touch
> > > > > +		$creat "${name}/$(printf "%.08d" "$nr")"
> > > > > +		if [ "$((nr % 40))" -eq 0 ]; then
> > > > > +			local nextents="$(_xfs_get_fsxattr nextents $name)"
> > > > > +			[ $nextents -gt $max_nextents ] && break
> > > > > +		fi
> > > > > +		nr=$((nr+1))
> > > > > +	done
> > > > > +
> > > > > +	test -z "${missing}" && return
> > > > > +	seq 1 2 "${nr}" | while read d; do
> > > > > +		rm -rf "${name}/$(printf "%.08d" "$d")"
> > > > > +	done
> > > > > +}
> > > > > +
> > > > >  # Add a bunch of attrs to a file
> > > > >  __populate_create_attr() {
> > > > >  	name="$1"
> > > > > @@ -176,6 +207,7 @@ _scratch_xfs_populate() {
> > > > >  
> > > > >  	blksz="$(stat -f -c '%s' "${SCRATCH_MNT}")"
> > > > >  	dblksz="$(_xfs_get_dir_blocksize "$SCRATCH_MNT")"
> > > > > +	isize="$(_xfs_get_inode_size "$SCRATCH_MNT")"
> > > > >  	crc="$(_xfs_has_feature "$SCRATCH_MNT" crc -v)"
> > > > >  	if [ $crc -eq 1 ]; then
> > > > >  		leaf_hdr_size=64
> > > > > @@ -226,7 +258,7 @@ _scratch_xfs_populate() {
> > > > >  
> > > > >  	# - BTREE
> > > > >  	echo "+ btree dir"
> > > > > -	__populate_create_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$((128 * dblksz / 40))" true
> > > > > +	__populate_xfs_create_btree_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$isize" true
> > > > >  
> > > > >  	# Symlinks
> > > > >  	# - FMT_LOCAL
> > > > > diff --git a/common/xfs b/common/xfs
> > > > > index 674384a9..7aaa63c7 100644
> > > > > --- a/common/xfs
> > > > > +++ b/common/xfs
> > > > > @@ -1487,6 +1487,15 @@ _require_xfsrestore_xflag()
> > > > >  			_notrun 'xfsrestore does not support -x flag.'
> > > > >  }
> > > > >  
> > > > > +# Number of bytes reserved for a full inode record, which includes the
> > > > > +# immediate fork areas.
> > > > > +_xfs_get_inode_size()
> > > > > +{
> > > > > +	local mntpoint="$1"
> > > > > +
> > > > > +	$XFS_INFO_PROG "$mntpoint" | sed -n '/meta-data=.*isize/s/^.*isize=\([0-9]*\).*$/\1/p'
> > > > > +}
> > > > > +
> > > > >  # Number of bytes reserved for only the inode record, excluding the
> > > > >  # immediate fork areas.
> > > > >  _xfs_get_inode_core_bytes()
> > > > > -- 
> > > > > 2.18.4
> > > > > 
> > > > 
> > > 
> >
Darrick J. Wong Dec. 11, 2022, 7:19 p.m. UTC | #6
On Sun, Dec 11, 2022 at 07:23:31PM +0800, Zorro Lang wrote:
> On Sun, Dec 11, 2022 at 07:01:25PM +0800, Zorro Lang wrote:
> > On Sat, Dec 10, 2022 at 06:30:10PM -0800, Darrick J. Wong wrote:
> > > On Sat, Dec 10, 2022 at 09:57:24PM +0800, Zorro Lang wrote:
> > > > On Fri, Dec 09, 2022 at 08:37:41AM -0800, Darrick J. Wong wrote:
> > > > > On Thu, Dec 08, 2022 at 03:28:43PM +0800, Ziyang Zhang wrote:
> > > > > > Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
> > > > > > S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
> > > > > > 
> > > > > > Actually we just observed it can fail after apply our inode
> > > > > > extent-to-btree workaround. The root cause is that the kernel may be
> > > > > > too good at allocating consecutive blocks so that the data fork is
> > > > > > still in extents format.
> > > > > > 
> > > > > > Therefore instead of using a fixed number, let's make sure the number
> > > > > > of extents is large enough than (inode size - inode core size) /
> > > > > > sizeof(xfs_bmbt_rec_t).
> > > > > > 
> > > > > > Reviewed-by: Zorro Lang <zlang@redhat.com>
> > > > > > Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> > > > > > Suggested-by: "Darrick J. Wong" <djwong@kernel.org>
> > > > > > Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> > > > > > Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
> > > > > > ---
> > > > > >  common/populate | 34 +++++++++++++++++++++++++++++++++-
> > > > > >  common/xfs      |  9 +++++++++
> > > > > >  2 files changed, 42 insertions(+), 1 deletion(-)
> > > > > > 
> > > > > > diff --git a/common/populate b/common/populate
> > > > > > index 6e004997..0d334a13 100644
> > > > > > --- a/common/populate
> > > > > > +++ b/common/populate
> > > > > > @@ -71,6 +71,37 @@ __populate_create_dir() {
> > > > > >  	done
> > > > > >  }
> > > > > >  
> > > > > > +# Create a large directory and ensure that it's a btree format
> > > > > > +__populate_xfs_create_btree_dir() {
> > > > > > +	local name="$1"
> > > > > > +	local isize="$2"
> > > > > > +	local missing="$3"
> > > > > > +	local icore_size="$(_xfs_inode_core_bytes)"
> > > > > 
> > > > > Doesn't this helper require a path argument now?
> > > > 
> > > > What kind of "path" argument? I think he copy it from __populate_create_dir(),
> > > > and keep using the "name" as the root path to create files/dir.
> > > 
> > > This path argument, from
> > > https://lore.kernel.org/fstests/20221208072843.1866615-2-ZiyangZhang@linux.alibaba.com/T/#u
> > > 
> > > +# Number of bytes reserved for only the inode record, excluding the
> > > +# immediate fork areas.
> > > +_xfs_get_inode_core_bytes()
> > > +{
> > > +	local dir="$1"
> > > +
> > > +	if _xfs_has_feature "$dir" crc; then
> > 
> > Oh, sorry I thought you mean __populate_xfs_create_btree_dir() require a path
> > argument...
> > 
> > Yes, and this helper's name should be _xfs_get_inode_core_bytes(), not
> > _xfs_inode_core_bytes(). I didn't find _xfs_inode_core_bytes from current
> > fstests. So it should be:
> > 
> >   local icore_size="$(_xfs_get_inode_core_bytes $SCRATCH_MNT)"
> 
> Hmm... wait a moment. The work directory of __populate_xfs_create_btree_dir() is
> "local name=$1", but the "$name" maybe not a mountpoint.
> 
> So we need to get the fs dev/mnt before calling _xfs_get_inode_core_bytes:
> 
> 	local icore_size="$(_xfs_get_inode_core_bytes `_df_dir $name | awk '{print $1}'`)"
> 
> Or use $TEST_MNT directly?

The __populate* functions only modify the scratch filesystem, so your
original suggestion is correct:

	local icore_size="$(_xfs_get_inode_core_bytes $SCRATCH_MNT)"

--D

> Thanks,
> Zorro
> 
> > 
> > Hi Ziyang,
> > 
> > Please modify this place, and check all other places which call
> > _xfs_get_inode_core_bytes and _xfs_get_inode_size, to make sure
> > they're all changed correctly.
> > 
> > Thanks,
> > Zorro
> > 
> > 
> > > 
> > > --D
> > > 
> > > > > 
> > > > > --D
> > > > > 
> > > > > > +	# We need enough extents to guarantee that the data fork is in
> > > > > > +	# btree format.  Cycling the mount to use xfs_db is too slow, so
> > > > > > +	# watch for when the extent count exceeds the space after the
> > > > > > +	# inode core.
> > > > > > +	local max_nextents="$(((isize - icore_size) / 16))"
> > > > > > +	local nr=0
> > > > > > +
> > > > > > +	mkdir -p "${name}"
> > > > > > +	while true; do
> > > > > > +		local creat=mkdir
> > > > > > +		test "$((nr % 20))" -eq 0 && creat=touch
> > > > > > +		$creat "${name}/$(printf "%.08d" "$nr")"
> > > > > > +		if [ "$((nr % 40))" -eq 0 ]; then
> > > > > > +			local nextents="$(_xfs_get_fsxattr nextents $name)"
> > > > > > +			[ $nextents -gt $max_nextents ] && break
> > > > > > +		fi
> > > > > > +		nr=$((nr+1))
> > > > > > +	done
> > > > > > +
> > > > > > +	test -z "${missing}" && return
> > > > > > +	seq 1 2 "${nr}" | while read d; do
> > > > > > +		rm -rf "${name}/$(printf "%.08d" "$d")"
> > > > > > +	done
> > > > > > +}
> > > > > > +
> > > > > >  # Add a bunch of attrs to a file
> > > > > >  __populate_create_attr() {
> > > > > >  	name="$1"
> > > > > > @@ -176,6 +207,7 @@ _scratch_xfs_populate() {
> > > > > >  
> > > > > >  	blksz="$(stat -f -c '%s' "${SCRATCH_MNT}")"
> > > > > >  	dblksz="$(_xfs_get_dir_blocksize "$SCRATCH_MNT")"
> > > > > > +	isize="$(_xfs_get_inode_size "$SCRATCH_MNT")"
> > > > > >  	crc="$(_xfs_has_feature "$SCRATCH_MNT" crc -v)"
> > > > > >  	if [ $crc -eq 1 ]; then
> > > > > >  		leaf_hdr_size=64
> > > > > > @@ -226,7 +258,7 @@ _scratch_xfs_populate() {
> > > > > >  
> > > > > >  	# - BTREE
> > > > > >  	echo "+ btree dir"
> > > > > > -	__populate_create_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$((128 * dblksz / 40))" true
> > > > > > +	__populate_xfs_create_btree_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$isize" true
> > > > > >  
> > > > > >  	# Symlinks
> > > > > >  	# - FMT_LOCAL
> > > > > > diff --git a/common/xfs b/common/xfs
> > > > > > index 674384a9..7aaa63c7 100644
> > > > > > --- a/common/xfs
> > > > > > +++ b/common/xfs
> > > > > > @@ -1487,6 +1487,15 @@ _require_xfsrestore_xflag()
> > > > > >  			_notrun 'xfsrestore does not support -x flag.'
> > > > > >  }
> > > > > >  
> > > > > > +# Number of bytes reserved for a full inode record, which includes the
> > > > > > +# immediate fork areas.
> > > > > > +_xfs_get_inode_size()
> > > > > > +{
> > > > > > +	local mntpoint="$1"
> > > > > > +
> > > > > > +	$XFS_INFO_PROG "$mntpoint" | sed -n '/meta-data=.*isize/s/^.*isize=\([0-9]*\).*$/\1/p'
> > > > > > +}
> > > > > > +
> > > > > >  # Number of bytes reserved for only the inode record, excluding the
> > > > > >  # immediate fork areas.
> > > > > >  _xfs_get_inode_core_bytes()
> > > > > > -- 
> > > > > > 2.18.4
> > > > > > 
> > > > > 
> > > > 
> > > 
>
Ziyang Zhang Dec. 12, 2022, 3:43 a.m. UTC | #7
On 2022/12/11 19:01, Zorro Lang wrote:
> Yes, and this helper's name should be _xfs_get_inode_core_bytes(), not
> _xfs_inode_core_bytes(). I didn't find _xfs_inode_core_bytes from current
> fstests. So it should be:
> 
>   local icore_size="$(_xfs_get_inode_core_bytes $SCRATCH_MNT)"
> 
> Hi Ziyang,
> 
> Please modify this place, and check all other places which call
> _xfs_get_inode_core_bytes and _xfs_get_inode_size, to make sure
> they're all changed correctly.
> 
> Thanks,
> Zorro

Hi Zorro,

You are correct. Sorry for my mistake.

I should use _xfs_get_inode_core_bytes here and add a path
argument($SCRATCH_MNT). I will send a new version.

Regards,
Zhang
diff mbox series

Patch

diff --git a/common/populate b/common/populate
index 6e004997..0d334a13 100644
--- a/common/populate
+++ b/common/populate
@@ -71,6 +71,37 @@  __populate_create_dir() {
 	done
 }
 
+# Create a large directory and ensure that it's a btree format
+__populate_xfs_create_btree_dir() {
+	local name="$1"
+	local isize="$2"
+	local missing="$3"
+	local icore_size="$(_xfs_inode_core_bytes)"
+	# We need enough extents to guarantee that the data fork is in
+	# btree format.  Cycling the mount to use xfs_db is too slow, so
+	# watch for when the extent count exceeds the space after the
+	# inode core.
+	local max_nextents="$(((isize - icore_size) / 16))"
+	local nr=0
+
+	mkdir -p "${name}"
+	while true; do
+		local creat=mkdir
+		test "$((nr % 20))" -eq 0 && creat=touch
+		$creat "${name}/$(printf "%.08d" "$nr")"
+		if [ "$((nr % 40))" -eq 0 ]; then
+			local nextents="$(_xfs_get_fsxattr nextents $name)"
+			[ $nextents -gt $max_nextents ] && break
+		fi
+		nr=$((nr+1))
+	done
+
+	test -z "${missing}" && return
+	seq 1 2 "${nr}" | while read d; do
+		rm -rf "${name}/$(printf "%.08d" "$d")"
+	done
+}
+
 # Add a bunch of attrs to a file
 __populate_create_attr() {
 	name="$1"
@@ -176,6 +207,7 @@  _scratch_xfs_populate() {
 
 	blksz="$(stat -f -c '%s' "${SCRATCH_MNT}")"
 	dblksz="$(_xfs_get_dir_blocksize "$SCRATCH_MNT")"
+	isize="$(_xfs_get_inode_size "$SCRATCH_MNT")"
 	crc="$(_xfs_has_feature "$SCRATCH_MNT" crc -v)"
 	if [ $crc -eq 1 ]; then
 		leaf_hdr_size=64
@@ -226,7 +258,7 @@  _scratch_xfs_populate() {
 
 	# - BTREE
 	echo "+ btree dir"
-	__populate_create_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$((128 * dblksz / 40))" true
+	__populate_xfs_create_btree_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$isize" true
 
 	# Symlinks
 	# - FMT_LOCAL
diff --git a/common/xfs b/common/xfs
index 674384a9..7aaa63c7 100644
--- a/common/xfs
+++ b/common/xfs
@@ -1487,6 +1487,15 @@  _require_xfsrestore_xflag()
 			_notrun 'xfsrestore does not support -x flag.'
 }
 
+# Number of bytes reserved for a full inode record, which includes the
+# immediate fork areas.
+_xfs_get_inode_size()
+{
+	local mntpoint="$1"
+
+	$XFS_INFO_PROG "$mntpoint" | sed -n '/meta-data=.*isize/s/^.*isize=\([0-9]*\).*$/\1/p'
+}
+
 # Number of bytes reserved for only the inode record, excluding the
 # immediate fork areas.
 _xfs_get_inode_core_bytes()