diff mbox series

[V6,2/2] common/populate: Ensure that S_IFDIR.FMT_BTREE is in btree format

Message ID 20221212055645.2067020-3-ZiyangZhang@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series cleanup and bugfix for xfs tests related to btree format | expand

Commit Message

Ziyang Zhang Dec. 12, 2022, 5:56 a.m. UTC
Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
S_IFDIR.FMT_BTREE could become btree format for its DATA fork.

Actually we just observed it can fail after apply our inode
extent-to-btree workaround. The root cause is that the kernel may be
too good at allocating consecutive blocks so that the data fork is
still in extents format.

Therefore instead of using a fixed number, let's make sure the number
of extents is large enough than (inode size - inode core size) /
sizeof(xfs_bmbt_rec_t).

Reviewed-by: Zorro Lang <zlang@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Suggested-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
---
 common/populate | 34 +++++++++++++++++++++++++++++++++-
 common/xfs      |  9 +++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

Comments

Yang Xu (Fujitsu) Jan. 3, 2023, 6:54 a.m. UTC | #1
on 2022/12/12 13:56, Ziyang Zhang wrote:

> Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
> S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
> 
> Actually we just observed it can fail after apply our inode
> extent-to-btree workaround. The root cause is that the kernel may be
> too good at allocating consecutive blocks so that the data fork is
> still in extents format.
> 
> Therefore instead of using a fixed number, let's make sure the number
> of extents is large enough than (inode size - inode core size) /
> sizeof(xfs_bmbt_rec_t).

After this patch, xfs/083 and xfs/155 failed on my envrionment(6.1.0+ 
kernel).

the 083 fail as below:
1 fuzzing xfs with FUZZ_ARGS=-3 -n 32 and FSCK_PASSES=10
   2 + create scratch fs
   3 meta-data=/dev/sdb9              isize=512    agcount=4, 
agsize=529878 blks
   4          =                       sectsz=512   attr=2, projid32bit=1
   5          =                       crc=1        finobt=1, sparse=1, 
rmapbt=0
   6          =                       reflink=0    bigtime=1 
inobtcount=1 nrext64=0
   7 data     =                       bsize=4096   blocks=2119510, 
imaxpct=25
   8          =                       sunit=0      swidth=0 blks
   9 naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
  10 log      =internal log           bsize=4096   blocks=16384, version=2
  11          =                       sectsz=512   sunit=0 blks, 
lazy-count=1
  12 realtime =none                   extsz=4096   blocks=0, rtextents=0
  13 + populate fs image
  14 MOUNT_OPTIONS =  -o usrquota,grpquota,prjquota
  15 + fill root ino chunk
  16 + extents file
  17 wrote 4096/4096 bytes at offset 0
  18 4 KiB, 1 ops; 0.0187 sec (212.891 KiB/sec and 53.2226 ops/sec)
  19 + btree extents file
  20 wrote 2097152/2097152 bytes at offset 0
  21 2 MiB, 2 ops; 0.0637 sec (31.370 MiB/sec and 31.3701 ops/sec)
  22 + inline dir
  23 + block dir
  24 + leaf dir
  25 + leafn dir
  26 + node dir
  27 + btree dir
  28 + inline symlink
  29 + extents symlink
  30 + special
  31 + local attr
  32 + leaf attr
  33 + node attr
  34 + btree attr
  35 + attr extents with a remote less-than-a-block value
  36 + attr extents with a remote one-block value
  37 + empty file
  38 + freesp btree
  39 wrote 4194304/4194304 bytes at offset 0
  40 4 MiB, 4 ops; 0.0941 sec (42.470 MiB/sec and 42.4696 ops/sec)
  41 + inobt btree
  42 + real files
  43 FILL FS
  44 src_sz 2052 fs_sz 8342940 nr 203
  45 failed to create ino 8578 dformat expected btree saw extents
  46 failed to create ino 8578 dformat expected btree saw extents
  47 (see /var/lib/xfstests/results//xfs/083.full for details)


It seems this logic can't ensure to creat a btree format dir and it
is a  extent format dir. Or I miss something?


Best Regards
Yang Xu


> 
> Reviewed-by: Zorro Lang <zlang@redhat.com>
> Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> Suggested-by: "Darrick J. Wong" <djwong@kernel.org>
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
> ---
>   common/populate | 34 +++++++++++++++++++++++++++++++++-
>   common/xfs      |  9 +++++++++
>   2 files changed, 42 insertions(+), 1 deletion(-)
> 
> diff --git a/common/populate b/common/populate
> index 6e004997..8f7f2113 100644
> --- a/common/populate
> +++ b/common/populate
> @@ -71,6 +71,37 @@ __populate_create_dir() {
>   	done
>   }
>   
> +# Create a large directory and ensure that it's a btree format
> +__populate_xfs_create_btree_dir() {
> +	local name="$1"
> +	local isize="$2"
> +	local missing="$3"
> +	local icore_size="$(_xfs_get_inode_core_bytes $SCRATCH_MNT)"
> +	# We need enough extents to guarantee that the data fork is in
> +	# btree format.  Cycling the mount to use xfs_db is too slow, so
> +	# watch for when the extent count exceeds the space after the
> +	# inode core.
> +	local max_nextents="$(((isize - icore_size) / 16))"
> +	local nr=0
> +
> +	mkdir -p "${name}"
> +	while true; do
> +		local creat=mkdir
> +		test "$((nr % 20))" -eq 0 && creat=touch
> +		$creat "${name}/$(printf "%.08d" "$nr")"
> +		if [ "$((nr % 40))" -eq 0 ]; then
> +			local nextents="$(_xfs_get_fsxattr nextents $name)"
> +			[ $nextents -gt $max_nextents ] && break
> +		fi
> +		nr=$((nr+1))
> +	done
> +
> +	test -z "${missing}" && return
> +	seq 1 2 "${nr}" | while read d; do
> +		rm -rf "${name}/$(printf "%.08d" "$d")"
> +	done
> +}
> +
>   # Add a bunch of attrs to a file
>   __populate_create_attr() {
>   	name="$1"
> @@ -176,6 +207,7 @@ _scratch_xfs_populate() {
>   
>   	blksz="$(stat -f -c '%s' "${SCRATCH_MNT}")"
>   	dblksz="$(_xfs_get_dir_blocksize "$SCRATCH_MNT")"
> +	isize="$(_xfs_get_inode_size "$SCRATCH_MNT")"
>   	crc="$(_xfs_has_feature "$SCRATCH_MNT" crc -v)"
>   	if [ $crc -eq 1 ]; then
>   		leaf_hdr_size=64
> @@ -226,7 +258,7 @@ _scratch_xfs_populate() {
>   
>   	# - BTREE
>   	echo "+ btree dir"
> -	__populate_create_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$((128 * dblksz / 40))" true
> +	__populate_xfs_create_btree_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$isize" true
>   
>   	# Symlinks
>   	# - FMT_LOCAL
> diff --git a/common/xfs b/common/xfs
> index 674384a9..7aaa63c7 100644
> --- a/common/xfs
> +++ b/common/xfs
> @@ -1487,6 +1487,15 @@ _require_xfsrestore_xflag()
>   			_notrun 'xfsrestore does not support -x flag.'
>   }
>   
> +# Number of bytes reserved for a full inode record, which includes the
> +# immediate fork areas.
> +_xfs_get_inode_size()
> +{
> +	local mntpoint="$1"
> +
> +	$XFS_INFO_PROG "$mntpoint" | sed -n '/meta-data=.*isize/s/^.*isize=\([0-9]*\).*$/\1/p'
> +}
> +
>   # Number of bytes reserved for only the inode record, excluding the
>   # immediate fork areas.
>   _xfs_get_inode_core_bytes()
Ziyang Zhang Jan. 3, 2023, 9:29 a.m. UTC | #2
On 2023/1/3 14:54, xuyang2018.jy@fujitsu.com wrote:
> 
> 
> on 2022/12/12 13:56, Ziyang Zhang wrote:
> 
>> Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
>> S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
>>
>> Actually we just observed it can fail after apply our inode
>> extent-to-btree workaround. The root cause is that the kernel may be
>> too good at allocating consecutive blocks so that the data fork is
>> still in extents format.
>>
>> Therefore instead of using a fixed number, let's make sure the number
>> of extents is large enough than (inode size - inode core size) /
>> sizeof(xfs_bmbt_rec_t).
> 
> After this patch, xfs/083 and xfs/155 failed on my envrionment(6.1.0+ 
> kernel).
> 
> the 083 fail as below:
> 1 fuzzing xfs with FUZZ_ARGS=-3 -n 32 and FSCK_PASSES=10
>    2 + create scratch fs
>    3 meta-data=/dev/sdb9              isize=512    agcount=4, 
> agsize=529878 blks
>    4          =                       sectsz=512   attr=2, projid32bit=1
>    5          =                       crc=1        finobt=1, sparse=1, 
> rmapbt=0
>    6          =                       reflink=0    bigtime=1 
> inobtcount=1 nrext64=0
>    7 data     =                       bsize=4096   blocks=2119510, 
> imaxpct=25
>    8          =                       sunit=0      swidth=0 blks
>    9 naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
>   10 log      =internal log           bsize=4096   blocks=16384, version=2
>   11          =                       sectsz=512   sunit=0 blks, 
> lazy-count=1
>   12 realtime =none                   extsz=4096   blocks=0, rtextents=0
>   13 + populate fs image
>   14 MOUNT_OPTIONS =  -o usrquota,grpquota,prjquota
>   15 + fill root ino chunk
>   16 + extents file
>   17 wrote 4096/4096 bytes at offset 0
>   18 4 KiB, 1 ops; 0.0187 sec (212.891 KiB/sec and 53.2226 ops/sec)
>   19 + btree extents file
>   20 wrote 2097152/2097152 bytes at offset 0
>   21 2 MiB, 2 ops; 0.0637 sec (31.370 MiB/sec and 31.3701 ops/sec)
>   22 + inline dir
>   23 + block dir
>   24 + leaf dir
>   25 + leafn dir
>   26 + node dir
>   27 + btree dir
>   28 + inline symlink
>   29 + extents symlink
>   30 + special
>   31 + local attr
>   32 + leaf attr
>   33 + node attr
>   34 + btree attr
>   35 + attr extents with a remote less-than-a-block value
>   36 + attr extents with a remote one-block value
>   37 + empty file
>   38 + freesp btree
>   39 wrote 4194304/4194304 bytes at offset 0
>   40 4 MiB, 4 ops; 0.0941 sec (42.470 MiB/sec and 42.4696 ops/sec)
>   41 + inobt btree
>   42 + real files
>   43 FILL FS
>   44 src_sz 2052 fs_sz 8342940 nr 203
>   45 failed to create ino 8578 dformat expected btree saw extents
>   46 failed to create ino 8578 dformat expected btree saw extents
>   47 (see /var/lib/xfstests/results//xfs/083.full for details)
> 
> 
> It seems this logic can't ensure to creat a btree format dir and it
> is a  extent format dir. Or I miss something?
> 
> 
> Best Regards
> Yang Xu

Hi Yang,

Looks like xfs/083 does call __populate_xfs_create_btree_dir.
Could you please share your test environment and config in
detail and I will try to reproduce your report.

Regards,
Zhang
Yang Xu (Fujitsu) Jan. 3, 2023, 9:58 a.m. UTC | #3
on  2023/01/03 17:29, Ziyang Zhang wrote
> On 2023/1/3 14:54, xuyang2018.jy@fujitsu.com wrote:
>>
>>
>> on 2022/12/12 13:56, Ziyang Zhang wrote:
>>
>>> Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
>>> S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
>>>
>>> Actually we just observed it can fail after apply our inode
>>> extent-to-btree workaround. The root cause is that the kernel may be
>>> too good at allocating consecutive blocks so that the data fork is
>>> still in extents format.
>>>
>>> Therefore instead of using a fixed number, let's make sure the number
>>> of extents is large enough than (inode size - inode core size) /
>>> sizeof(xfs_bmbt_rec_t).
>>
>> After this patch, xfs/083 and xfs/155 failed on my envrionment(6.1.0+
>> kernel).
>>
>> the 083 fail as below:
>> 1 fuzzing xfs with FUZZ_ARGS=-3 -n 32 and FSCK_PASSES=10
>>     2 + create scratch fs
>>     3 meta-data=/dev/sdb9              isize=512    agcount=4,
>> agsize=529878 blks
>>     4          =                       sectsz=512   attr=2, projid32bit=1
>>     5          =                       crc=1        finobt=1, sparse=1,
>> rmapbt=0
>>     6          =                       reflink=0    bigtime=1
>> inobtcount=1 nrext64=0
>>     7 data     =                       bsize=4096   blocks=2119510,
>> imaxpct=25
>>     8          =                       sunit=0      swidth=0 blks
>>     9 naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
>>    10 log      =internal log           bsize=4096   blocks=16384, version=2
>>    11          =                       sectsz=512   sunit=0 blks,
>> lazy-count=1
>>    12 realtime =none                   extsz=4096   blocks=0, rtextents=0
>>    13 + populate fs image
>>    14 MOUNT_OPTIONS =  -o usrquota,grpquota,prjquota
>>    15 + fill root ino chunk
>>    16 + extents file
>>    17 wrote 4096/4096 bytes at offset 0
>>    18 4 KiB, 1 ops; 0.0187 sec (212.891 KiB/sec and 53.2226 ops/sec)
>>    19 + btree extents file
>>    20 wrote 2097152/2097152 bytes at offset 0
>>    21 2 MiB, 2 ops; 0.0637 sec (31.370 MiB/sec and 31.3701 ops/sec)
>>    22 + inline dir
>>    23 + block dir
>>    24 + leaf dir
>>    25 + leafn dir
>>    26 + node dir
>>    27 + btree dir
>>    28 + inline symlink
>>    29 + extents symlink
>>    30 + special
>>    31 + local attr
>>    32 + leaf attr
>>    33 + node attr
>>    34 + btree attr
>>    35 + attr extents with a remote less-than-a-block value
>>    36 + attr extents with a remote one-block value
>>    37 + empty file
>>    38 + freesp btree
>>    39 wrote 4194304/4194304 bytes at offset 0
>>    40 4 MiB, 4 ops; 0.0941 sec (42.470 MiB/sec and 42.4696 ops/sec)
>>    41 + inobt btree
>>    42 + real files
>>    43 FILL FS
>>    44 src_sz 2052 fs_sz 8342940 nr 203
>>    45 failed to create ino 8578 dformat expected btree saw extents
>>    46 failed to create ino 8578 dformat expected btree saw extents
>>    47 (see /var/lib/xfstests/results//xfs/083.full for details)
>>
>>
>> It seems this logic can't ensure to creat a btree format dir and it
>> is a  extent format dir. Or I miss something?
>>
>>
>> Best Regards
>> Yang Xu
> 
> Hi Yang,
> 
> Looks like xfs/083 does call __populate_xfs_create_btree_dir.

Yes.
> Could you please share your test environment and config in
> detail and I will try to reproduce your report.

Of course, I use fedora31 and 6.1 kernel. xfsprogs uses upstream version 
xfsprogs: Release v6.0.0.

local.config
export TEST_DEV=/dev/sdb8
export TEST_DIR=/mnt/xfstests/test
export SCRATCH_DEV=/dev/sdb9
export SCRATCH_MNT=/mnt/xfstests/scratch
export XFS_MKFS_OPTIONS="-b size=4096 -m reflink=1"


disk info:
/dev/sdb8       566241280 608184319  41943040    20G 83 Linux
/dev/sdb9       608186368 625142447  16956080   8.1G 83 Linux


BTW, xfs/273 xfs/495 that called _scratch_populate_cached also failed 
with this commit under selinux Permissive status and passed under 
selinux  enforcing status. I guess the extend attr fork was filled
in selinux enabled status, so we can create btree dir by smaller number 
files.

Best Regards
Yang Xu
> 
> Regards,
> Zhang
>
Ziyang Zhang Jan. 6, 2023, 9:55 a.m. UTC | #4
On 2023/1/3 17:58, xuyang2018.jy@fujitsu.com wrote:
> on  2023/01/03 17:29, Ziyang Zhang wrote
>> On 2023/1/3 14:54, xuyang2018.jy@fujitsu.com wrote:
>>>
>>>
>>> on 2022/12/12 13:56, Ziyang Zhang wrote:
>>>
>>>> Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
>>>> S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
>>>>
>>>> Actually we just observed it can fail after apply our inode
>>>> extent-to-btree workaround. The root cause is that the kernel may be
>>>> too good at allocating consecutive blocks so that the data fork is
>>>> still in extents format.
>>>>
>>>> Therefore instead of using a fixed number, let's make sure the number
>>>> of extents is large enough than (inode size - inode core size) /
>>>> sizeof(xfs_bmbt_rec_t).
>>>
>>> After this patch, xfs/083 and xfs/155 failed on my envrionment(6.1.0+
>>> kernel).
>>>
>>> the 083 fail as below:
>>> 1 fuzzing xfs with FUZZ_ARGS=-3 -n 32 and FSCK_PASSES=10
>>>     2 + create scratch fs
>>>     3 meta-data=/dev/sdb9              isize=512    agcount=4,
>>> agsize=529878 blks
>>>     4          =                       sectsz=512   attr=2, projid32bit=1
>>>     5          =                       crc=1        finobt=1, sparse=1,
>>> rmapbt=0
>>>     6          =                       reflink=0    bigtime=1
>>> inobtcount=1 nrext64=0
>>>     7 data     =                       bsize=4096   blocks=2119510,
>>> imaxpct=25
>>>     8          =                       sunit=0      swidth=0 blks
>>>     9 naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
>>>    10 log      =internal log           bsize=4096   blocks=16384, version=2
>>>    11          =                       sectsz=512   sunit=0 blks,
>>> lazy-count=1
>>>    12 realtime =none                   extsz=4096   blocks=0, rtextents=0
>>>    13 + populate fs image
>>>    14 MOUNT_OPTIONS =  -o usrquota,grpquota,prjquota
>>>    15 + fill root ino chunk
>>>    16 + extents file
>>>    17 wrote 4096/4096 bytes at offset 0
>>>    18 4 KiB, 1 ops; 0.0187 sec (212.891 KiB/sec and 53.2226 ops/sec)
>>>    19 + btree extents file
>>>    20 wrote 2097152/2097152 bytes at offset 0
>>>    21 2 MiB, 2 ops; 0.0637 sec (31.370 MiB/sec and 31.3701 ops/sec)
>>>    22 + inline dir
>>>    23 + block dir
>>>    24 + leaf dir
>>>    25 + leafn dir
>>>    26 + node dir
>>>    27 + btree dir
>>>    28 + inline symlink
>>>    29 + extents symlink
>>>    30 + special
>>>    31 + local attr
>>>    32 + leaf attr
>>>    33 + node attr
>>>    34 + btree attr
>>>    35 + attr extents with a remote less-than-a-block value
>>>    36 + attr extents with a remote one-block value
>>>    37 + empty file
>>>    38 + freesp btree
>>>    39 wrote 4194304/4194304 bytes at offset 0
>>>    40 4 MiB, 4 ops; 0.0941 sec (42.470 MiB/sec and 42.4696 ops/sec)
>>>    41 + inobt btree
>>>    42 + real files
>>>    43 FILL FS
>>>    44 src_sz 2052 fs_sz 8342940 nr 203
>>>    45 failed to create ino 8578 dformat expected btree saw extents
>>>    46 failed to create ino 8578 dformat expected btree saw extents
>>>    47 (see /var/lib/xfstests/results//xfs/083.full for details)
>>>
>>>
>>> It seems this logic can't ensure to creat a btree format dir and it
>>> is a  extent format dir. Or I miss something?
>>>
>>>
>>> Best Regards
>>> Yang Xu
>>
>> Hi Yang,
>>
>> Looks like xfs/083 does call __populate_xfs_create_btree_dir.
> 
> Yes.
>> Could you please share your test environment and config in
>> detail and I will try to reproduce your report.
> 
> Of course, I use fedora31 and 6.1 kernel. xfsprogs uses upstream version 
> xfsprogs: Release v6.0.0.
> 
> local.config
> export TEST_DEV=/dev/sdb8
> export TEST_DIR=/mnt/xfstests/test
> export SCRATCH_DEV=/dev/sdb9
> export SCRATCH_MNT=/mnt/xfstests/scratch
> export XFS_MKFS_OPTIONS="-b size=4096 -m reflink=1"
> 
> 
> disk info:
> /dev/sdb8       566241280 608184319  41943040    20G 83 Linux
> /dev/sdb9       608186368 625142447  16956080   8.1G 83 Linux
> 
> 
> BTW, xfs/273 xfs/495 that called _scratch_populate_cached also failed 
> with this commit under selinux Permissive status and passed under 
> selinux  enforcing status. I guess the extend attr fork was filled
> in selinux enabled status, so we can create btree dir by smaller number 
> files.
> 


Hi Yang,

Could you please try this patch:

diff --git a/common/populate b/common/populate
index 44b4af16..bedcdc41 100644
--- a/common/populate
+++ b/common/populate
@@ -81,7 +81,7 @@ __populate_xfs_create_btree_dir() {
        # btree format.  Cycling the mount to use xfs_db is too slow, so
        # watch for when the extent count exceeds the space after the
        # inode core.
-       local max_nextents="$(((isize - icore_size) / 16))"
+       local max_nextents="$(((isize - icore_size) / 16 + 1))"
        local nr=0
 
        mkdir -p "${name}"

This will add 1 to max_nextents. Then xfs/083 will pass on my env(6.1 kernel,
6.0.0 xfsprogs, selinux disabled)

Regards,
Zhang
Yang Xu (Fujitsu) Jan. 9, 2023, 9:54 a.m. UTC | #5
On 2023/1/3 17:58, xuyang2018.jy@fujitsu.com wrote:
> on  2023/01/03 17:29, Ziyang Zhang wrote

> Hi Yang,

> Could you please try this patch:

> diff --git a/common/populate b/common/populate index 44b4af16..bedcdc41 100644
> --- a/common/populate
> +++ b/common/populate
> @@ -81,7 +81,7 @@ __populate_xfs_create_btree_dir() {
>         # btree format.  Cycling the mount to use xfs_db is too slow, so
>         # watch for when the extent count exceeds the space after the
>         # inode core.
> -       local max_nextents="$(((isize - icore_size) / 16))"
> +       local max_nextents="$(((isize - icore_size) / 16 + 1))"
>         local nr=0
> 
>         mkdir -p "${name}"
>
> This will add 1 to max_nextents. Then xfs/083 will pass on my env(6.1 kernel,
> 6.0.0 xfsprogs, selinux disabled)

Yes, this can solve this problem.


Best Regards
Yang Xu

> Regards,
> Zhang
Ziyang Zhang Jan. 10, 2023, 7:19 a.m. UTC | #6
On 2023/1/3 14:54, xuyang2018.jy@fujitsu.com wrote:
> 
> 
> on 2022/12/12 13:56, Ziyang Zhang wrote:
> 
>> Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
>> S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
>>
>> Actually we just observed it can fail after apply our inode
>> extent-to-btree workaround. The root cause is that the kernel may be
>> too good at allocating consecutive blocks so that the data fork is
>> still in extents format.
>>
>> Therefore instead of using a fixed number, let's make sure the number
>> of extents is large enough than (inode size - inode core size) /
>> sizeof(xfs_bmbt_rec_t).
> 
> After this patch, xfs/083 and xfs/155 failed on my envrionment(6.1.0+ 
> kernel).
> 
> the 083 fail as below:
> 1 fuzzing xfs with FUZZ_ARGS=-3 -n 32 and FSCK_PASSES=10
>    2 + create scratch fs
>    3 meta-data=/dev/sdb9              isize=512    agcount=4, 
> agsize=529878 blks
>    4          =                       sectsz=512   attr=2, projid32bit=1
>    5          =                       crc=1        finobt=1, sparse=1, 
> rmapbt=0
>    6          =                       reflink=0    bigtime=1 
> inobtcount=1 nrext64=0
>    7 data     =                       bsize=4096   blocks=2119510, 
> imaxpct=25
>    8          =                       sunit=0      swidth=0 blks
>    9 naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
>   10 log      =internal log           bsize=4096   blocks=16384, version=2
>   11          =                       sectsz=512   sunit=0 blks, 
> lazy-count=1
>   12 realtime =none                   extsz=4096   blocks=0, rtextents=0
>   13 + populate fs image
>   14 MOUNT_OPTIONS =  -o usrquota,grpquota,prjquota
>   15 + fill root ino chunk
>   16 + extents file
>   17 wrote 4096/4096 bytes at offset 0
>   18 4 KiB, 1 ops; 0.0187 sec (212.891 KiB/sec and 53.2226 ops/sec)
>   19 + btree extents file
>   20 wrote 2097152/2097152 bytes at offset 0
>   21 2 MiB, 2 ops; 0.0637 sec (31.370 MiB/sec and 31.3701 ops/sec)
>   22 + inline dir
>   23 + block dir
>   24 + leaf dir
>   25 + leafn dir
>   26 + node dir
>   27 + btree dir
>   28 + inline symlink
>   29 + extents symlink
>   30 + special
>   31 + local attr
>   32 + leaf attr
>   33 + node attr
>   34 + btree attr
>   35 + attr extents with a remote less-than-a-block value
>   36 + attr extents with a remote one-block value
>   37 + empty file
>   38 + freesp btree
>   39 wrote 4194304/4194304 bytes at offset 0
>   40 4 MiB, 4 ops; 0.0941 sec (42.470 MiB/sec and 42.4696 ops/sec)
>   41 + inobt btree
>   42 + real files
>   43 FILL FS
>   44 src_sz 2052 fs_sz 8342940 nr 203
>   45 failed to create ino 8578 dformat expected btree saw extents
>   46 failed to create ino 8578 dformat expected btree saw extents
>   47 (see /var/lib/xfstests/results//xfs/083.full for details)
> 
> 
> It seems this logic can't ensure to creat a btree format dir and it
> is a  extent format dir. Or I miss something?
> 
> 
> Best Regards
> Yang Xu

Hi all,

__populate_xfs_create_btree_dir() will delete 50% files
after creating all the files if we set "missing" to 1(true).
This may transform the data fork from BTREE format to EXTENT
format by merging blocks.

Without setting "missing", I find that xfs/083 xfs/155 xfs/273
and xfs/495 pass.

BTW, I have heard that Dave has wrote allocation speedup code
and thank Dave for looking into this as well.

Regards,
Zhang
Yang Xu (Fujitsu) Jan. 10, 2023, 8:56 a.m. UTC | #7
on 2023/1/10 15:19, Ziyang Zhang 写道:
> On 2023/1/3 14:54, xuyang2018.jy@fujitsu.com wrote:
>>
>>
>> on 2022/12/12 13:56, Ziyang Zhang wrote:
>>
>>> Sometimes "$((128 * dblksz / 40))" dirents cannot make sure that
>>> S_IFDIR.FMT_BTREE could become btree format for its DATA fork.
>>>
>>> Actually we just observed it can fail after apply our inode
>>> extent-to-btree workaround. The root cause is that the kernel may be
>>> too good at allocating consecutive blocks so that the data fork is
>>> still in extents format.
>>>
>>> Therefore instead of using a fixed number, let's make sure the number
>>> of extents is large enough than (inode size - inode core size) /
>>> sizeof(xfs_bmbt_rec_t).
>>
>> After this patch, xfs/083 and xfs/155 failed on my envrionment(6.1.0+
>> kernel).
>>
>> the 083 fail as below:
>> 1 fuzzing xfs with FUZZ_ARGS=-3 -n 32 and FSCK_PASSES=10
>>     2 + create scratch fs
>>     3 meta-data=/dev/sdb9              isize=512    agcount=4,
>> agsize=529878 blks
>>     4          =                       sectsz=512   attr=2, projid32bit=1
>>     5          =                       crc=1        finobt=1, sparse=1,
>> rmapbt=0
>>     6          =                       reflink=0    bigtime=1
>> inobtcount=1 nrext64=0
>>     7 data     =                       bsize=4096   blocks=2119510,
>> imaxpct=25
>>     8          =                       sunit=0      swidth=0 blks
>>     9 naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
>>    10 log      =internal log           bsize=4096   blocks=16384, version=2
>>    11          =                       sectsz=512   sunit=0 blks,
>> lazy-count=1
>>    12 realtime =none                   extsz=4096   blocks=0, rtextents=0
>>    13 + populate fs image
>>    14 MOUNT_OPTIONS =  -o usrquota,grpquota,prjquota
>>    15 + fill root ino chunk
>>    16 + extents file
>>    17 wrote 4096/4096 bytes at offset 0
>>    18 4 KiB, 1 ops; 0.0187 sec (212.891 KiB/sec and 53.2226 ops/sec)
>>    19 + btree extents file
>>    20 wrote 2097152/2097152 bytes at offset 0
>>    21 2 MiB, 2 ops; 0.0637 sec (31.370 MiB/sec and 31.3701 ops/sec)
>>    22 + inline dir
>>    23 + block dir
>>    24 + leaf dir
>>    25 + leafn dir
>>    26 + node dir
>>    27 + btree dir
>>    28 + inline symlink
>>    29 + extents symlink
>>    30 + special
>>    31 + local attr
>>    32 + leaf attr
>>    33 + node attr
>>    34 + btree attr
>>    35 + attr extents with a remote less-than-a-block value
>>    36 + attr extents with a remote one-block value
>>    37 + empty file
>>    38 + freesp btree
>>    39 wrote 4194304/4194304 bytes at offset 0
>>    40 4 MiB, 4 ops; 0.0941 sec (42.470 MiB/sec and 42.4696 ops/sec)
>>    41 + inobt btree
>>    42 + real files
>>    43 FILL FS
>>    44 src_sz 2052 fs_sz 8342940 nr 203
>>    45 failed to create ino 8578 dformat expected btree saw extents
>>    46 failed to create ino 8578 dformat expected btree saw extents
>>    47 (see /var/lib/xfstests/results//xfs/083.full for details)
>>
>>
>> It seems this logic can't ensure to creat a btree format dir and it
>> is a  extent format dir. Or I miss something?
>>
>>
>> Best Regards
>> Yang Xu
> 
> Hi all,
> 
> __populate_xfs_create_btree_dir() will delete 50% files
> after creating all the files if we set "missing" to 1(true).
> This may transform the data fork from BTREE format to EXTENT
> format by merging blocks.
> 
> Without setting "missing", I find that xfs/083 xfs/155 xfs/273
> and xfs/495 pass.
> 
> BTW, I have heard that Dave has wrote allocation speedup code
> and thank Dave for looking into this as well.

So, do you plan to send a patch to fix this  code?

Best Regards
Yang Xu
> 
> Regards,
> Zhang
> 
>
diff mbox series

Patch

diff --git a/common/populate b/common/populate
index 6e004997..8f7f2113 100644
--- a/common/populate
+++ b/common/populate
@@ -71,6 +71,37 @@  __populate_create_dir() {
 	done
 }
 
+# Create a large directory and ensure that it's a btree format
+__populate_xfs_create_btree_dir() {
+	local name="$1"
+	local isize="$2"
+	local missing="$3"
+	local icore_size="$(_xfs_get_inode_core_bytes $SCRATCH_MNT)"
+	# We need enough extents to guarantee that the data fork is in
+	# btree format.  Cycling the mount to use xfs_db is too slow, so
+	# watch for when the extent count exceeds the space after the
+	# inode core.
+	local max_nextents="$(((isize - icore_size) / 16))"
+	local nr=0
+
+	mkdir -p "${name}"
+	while true; do
+		local creat=mkdir
+		test "$((nr % 20))" -eq 0 && creat=touch
+		$creat "${name}/$(printf "%.08d" "$nr")"
+		if [ "$((nr % 40))" -eq 0 ]; then
+			local nextents="$(_xfs_get_fsxattr nextents $name)"
+			[ $nextents -gt $max_nextents ] && break
+		fi
+		nr=$((nr+1))
+	done
+
+	test -z "${missing}" && return
+	seq 1 2 "${nr}" | while read d; do
+		rm -rf "${name}/$(printf "%.08d" "$d")"
+	done
+}
+
 # Add a bunch of attrs to a file
 __populate_create_attr() {
 	name="$1"
@@ -176,6 +207,7 @@  _scratch_xfs_populate() {
 
 	blksz="$(stat -f -c '%s' "${SCRATCH_MNT}")"
 	dblksz="$(_xfs_get_dir_blocksize "$SCRATCH_MNT")"
+	isize="$(_xfs_get_inode_size "$SCRATCH_MNT")"
 	crc="$(_xfs_has_feature "$SCRATCH_MNT" crc -v)"
 	if [ $crc -eq 1 ]; then
 		leaf_hdr_size=64
@@ -226,7 +258,7 @@  _scratch_xfs_populate() {
 
 	# - BTREE
 	echo "+ btree dir"
-	__populate_create_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$((128 * dblksz / 40))" true
+	__populate_xfs_create_btree_dir "${SCRATCH_MNT}/S_IFDIR.FMT_BTREE" "$isize" true
 
 	# Symlinks
 	# - FMT_LOCAL
diff --git a/common/xfs b/common/xfs
index 674384a9..7aaa63c7 100644
--- a/common/xfs
+++ b/common/xfs
@@ -1487,6 +1487,15 @@  _require_xfsrestore_xflag()
 			_notrun 'xfsrestore does not support -x flag.'
 }
 
+# Number of bytes reserved for a full inode record, which includes the
+# immediate fork areas.
+_xfs_get_inode_size()
+{
+	local mntpoint="$1"
+
+	$XFS_INFO_PROG "$mntpoint" | sed -n '/meta-data=.*isize/s/^.*isize=\([0-9]*\).*$/\1/p'
+}
+
 # Number of bytes reserved for only the inode record, excluding the
 # immediate fork areas.
 _xfs_get_inode_core_bytes()