diff mbox series

[V6,05/13] xfs: Check for extent overflow when growing realtime bitmap/summary inodes

Message ID 20210309050124.23797-6-chandanrlinux@gmail.com (mailing list archive)
State Accepted
Headers show
Series xfs: Tests to verify inode fork extent count overflow detection | expand

Commit Message

Chandan Babu R March 9, 2021, 5:01 a.m. UTC
Verify that XFS does not cause realtime bitmap/summary inode fork's
extent count to overflow when growing the realtime volume associated
with a filesystem.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com>
---
 tests/xfs/529     | 124 ++++++++++++++++++++++++++++++++++++++++++++++
 tests/xfs/529.out |  11 ++++
 tests/xfs/group   |   1 +
 3 files changed, 136 insertions(+)
 create mode 100755 tests/xfs/529
 create mode 100644 tests/xfs/529.out

Comments

Allison Henderson March 10, 2021, 7:55 p.m. UTC | #1
On 3/8/21 10:01 PM, Chandan Babu R wrote:
> Verify that XFS does not cause realtime bitmap/summary inode fork's
> extent count to overflow when growing the realtime volume associated
> with a filesystem.
> 
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com>
Ok, makes sense
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>

> ---
>   tests/xfs/529     | 124 ++++++++++++++++++++++++++++++++++++++++++++++
>   tests/xfs/529.out |  11 ++++
>   tests/xfs/group   |   1 +
>   3 files changed, 136 insertions(+)
>   create mode 100755 tests/xfs/529
>   create mode 100644 tests/xfs/529.out
> 
> diff --git a/tests/xfs/529 b/tests/xfs/529
> new file mode 100755
> index 00000000..dd7019f5
> --- /dev/null
> +++ b/tests/xfs/529
> @@ -0,0 +1,124 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2021 Chandan Babu R.  All Rights Reserved.
> +#
> +# FS QA Test 529
> +#
> +# Verify that XFS does not cause bitmap/summary inode fork's extent count to
> +# overflow when growing an the realtime volume of the filesystem.
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1	# failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	_scratch_unmount >> $seqres.full 2>&1
> +	test -e "$rtdev" && losetup -d $rtdev >> $seqres.full 2>&1
> +	rm -f $tmp.* $TEST_DIR/$seq.rtvol
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +. ./common/inject
> +. ./common/populate
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +
> +_supported_fs xfs
> +# Note that we don't _require_realtime because we synthesize a rt volume
> +# below.
> +_require_test
> +_require_xfs_debug
> +_require_test_program "punch-alternating"
> +_require_xfs_io_error_injection "reduce_max_iextents"
> +_require_xfs_io_error_injection "bmap_alloc_minlen_extent"
> +_require_scratch_nocheck
> +
> +echo "* Test extending rt inodes"
> +
> +_scratch_mkfs | _filter_mkfs >> $seqres.full 2> $tmp.mkfs
> +. $tmp.mkfs
> +
> +echo "Create fake rt volume"
> +nr_bitmap_blks=25
> +nr_bits=$((nr_bitmap_blks * dbsize * 8))
> +
> +# Realtime extent size has to be atleast 4k in size.
> +if (( $dbsize < 4096 )); then
> +	rtextsz=4096
> +else
> +	rtextsz=$dbsize
> +fi
> +
> +rtdevsz=$((nr_bits * rtextsz))
> +truncate -s $rtdevsz $TEST_DIR/$seq.rtvol
> +rtdev=$(_create_loop_device $TEST_DIR/$seq.rtvol)
> +
> +echo "Format and mount rt volume"
> +
> +export USE_EXTERNAL=yes
> +export SCRATCH_RTDEV=$rtdev
> +_scratch_mkfs -d size=$((1024 * 1024 * 1024)) -b size=${dbsize} \
> +	      -r size=${rtextsz},extsize=${rtextsz} >> $seqres.full
> +_try_scratch_mount || _notrun "Couldn't mount fs with synthetic rt volume"
> +
> +echo "Consume free space"
> +fillerdir=$SCRATCH_MNT/fillerdir
> +nr_free_blks=$(stat -f -c '%f' $SCRATCH_MNT)
> +nr_free_blks=$((nr_free_blks * 90 / 100))
> +
> +_fill_fs $((dbsize * nr_free_blks)) $fillerdir $dbsize 0 >> $seqres.full 2>&1
> +
> +echo "Create fragmented filesystem"
> +for dentry in $(ls -1 $fillerdir/); do
> +	$here/src/punch-alternating $fillerdir/$dentry >> $seqres.full
> +done
> +
> +echo "Inject reduce_max_iextents error tag"
> +_scratch_inject_error reduce_max_iextents 1
> +
> +echo "Inject bmap_alloc_minlen_extent error tag"
> +_scratch_inject_error bmap_alloc_minlen_extent 1
> +
> +echo "Grow realtime volume"
> +$XFS_GROWFS_PROG -r $SCRATCH_MNT >> $seqres.full 2>&1
> +if [[ $? == 0 ]]; then
> +	echo "Growfs succeeded; should have failed."
> +	exit 1
> +fi
> +
> +_scratch_unmount >> $seqres.full
> +
> +echo "Verify rbmino's and rsumino's extent count"
> +for rtino in rbmino rsumino; do
> +	ino=$(_scratch_xfs_get_metadata_field $rtino "sb 0")
> +	echo "$rtino = $ino" >> $seqres.full
> +
> +	nextents=$(_scratch_get_iext_count $ino data || \
> +			_fail "Unable to obtain inode fork's extent count")
> +	if (( $nextents > 10 )); then
> +		echo "Extent count overflow check failed: nextents = $nextents"
> +		exit 1
> +	fi
> +done
> +
> +echo "Check filesystem"
> +_check_xfs_filesystem $SCRATCH_DEV none $rtdev
> +
> +losetup -d $rtdev
> +rm -f $TEST_DIR/$seq.rtvol
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/xfs/529.out b/tests/xfs/529.out
> new file mode 100644
> index 00000000..4ee113a4
> --- /dev/null
> +++ b/tests/xfs/529.out
> @@ -0,0 +1,11 @@
> +QA output created by 529
> +* Test extending rt inodes
> +Create fake rt volume
> +Format and mount rt volume
> +Consume free space
> +Create fragmented filesystem
> +Inject reduce_max_iextents error tag
> +Inject bmap_alloc_minlen_extent error tag
> +Grow realtime volume
> +Verify rbmino's and rsumino's extent count
> +Check filesystem
> diff --git a/tests/xfs/group b/tests/xfs/group
> index 2356c4a9..5dff7acb 100644
> --- a/tests/xfs/group
> +++ b/tests/xfs/group
> @@ -526,3 +526,4 @@
>   526 auto quick mkfs
>   527 auto quick quota
>   528 auto quick quota
> +529 auto quick realtime growfs
>
Darrick J. Wong March 22, 2021, 5:56 p.m. UTC | #2
On Tue, Mar 09, 2021 at 10:31:16AM +0530, Chandan Babu R wrote:
> Verify that XFS does not cause realtime bitmap/summary inode fork's
> extent count to overflow when growing the realtime volume associated
> with a filesystem.
> 
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com>

Soo... I discovered that this test doesn't pass with multiblock
directories:

FSTYP         -- xfs (debug)
PLATFORM      -- Linux/x86_64 alder-mtr00 5.12.0-rc4-xfsx #rc4 SMP PREEMPT Mon Mar 22 10:03:45 PDT 2021
MKFS_OPTIONS  -- -f -b size=1024, /dev/sdf
MOUNT_OPTIONS -- -o usrquota,grpquota,prjquota, /dev/sdf /opt

xfs/529 - output mismatch (see /var/tmp/fstests/xfs/529.out.bad)
    --- tests/xfs/529.out       2021-03-21 11:44:09.383407733 -0700
    +++ /var/tmp/fstests/xfs/529.out.bad        2021-03-22 10:36:34.000348426 -0700
    @@ -4,12 +4,21 @@
     Inject reduce_max_iextents error tag
     Create fragmented file
     Verify $testfile's extent count
    +/opt/testfile: No such file or directory
    +/tmp/fstests/tests/xfs/529: line 72: ((: > 10 : syntax error: operand expected (error token is "> 10 ")
    +rm: cannot remove '/opt/testfile': No such file or directory
     * Fallocate unwritten extents
    ...
    (Run 'diff -u /tmp/fstests/tests/xfs/529.out /var/tmp/fstests/xfs/529.out.bad'  to see the entire diff)
Ran: xfs/529
Failures: xfs/529
Failed 1 of 1 tests

Test xfs/529 FAILED with code 1 and bad golden output:
--- /tmp/fstests/tests/xfs/529.out      2021-03-21 11:44:09.383407733 -0700
+++ /var/tmp/fstests/xfs/529.out.bad    2021-03-22 10:36:34.000348426 -0700
@@ -4,12 +4,21 @@
 Inject reduce_max_iextents error tag
 Create fragmented file
 Verify $testfile's extent count
+/opt/testfile: No such file or directory
+/tmp/fstests/tests/xfs/529: line 72: ((: > 10 : syntax error: operand expected (error token is "> 10 ")
+rm: cannot remove '/opt/testfile': No such file or directory
 * Fallocate unwritten extents
 Fallocate fragmented file
 Verify $testfile's extent count
+/opt/testfile: No such file or directory
+/tmp/fstests/tests/xfs/529: line 91: ((: > 10 : syntax error: operand expected (error token is "> 10 ")
+rm: cannot remove '/opt/testfile': No such file or directory
 * Directio write
 Create fragmented file via directio writes
 Verify $testfile's extent count
+/opt/testfile: No such file or directory
+/tmp/fstests/tests/xfs/529: line 110: ((: > 10 : syntax error: operand expected (error token is "> 10 ")
+rm: cannot remove '/opt/testfile': No such file or directory
 * Extend quota inodes
 Disable reduce_max_iextents error tag
 Consume free space

The test appears to fail because we cannot create even a single file in
the root directory.  Looking at xfs_create, I see:

	error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK,
			XFS_IEXT_DIR_MANIP_CNT(mp));
	if (error)
		goto out_trans_cancel;

XFS_IEXT_DIR_MANIP_CNT is defined as:

	#define XFS_IEXT_DIR_MANIP_CNT(mp) \
		((XFS_DA_NODE_MAXDEPTH + 1 + 1) * (mp)->m_dir_geo->fsbcount)

If one formats a filesystem with 1k blocks, the result will be a
filesystem with 4k directory blocks:

# mkfs.xfs -b size=1024 /dev/sdf -Nf
meta-data=/dev/sdf               isize=512    agcount=4, agsize=5192704 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=1, sparse=1, rmapbt=1
         =                       reflink=1    bigtime=1 inobtcount=1
         =                       metadir=0   
data     =                       bsize=1024   blocks=20770816, imaxpct=25
         =                       sunit=0      swidth=0 blks
naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
log      =internal log           bsize=1024   blocks=10240, version=2
         =                       sectsz=512   sunit=0 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0

Note "data bsize" is 1024, and "naming bsize" is 4096.

In the kernel, we set m_dir_geo->fsbcount = "naming bsize" /
"data bsize", or 4 in this case.  Since XFS_DA_NODE_MAXDEPTH is always
5, this macro expands to:

	(5 + 1 + 1) * (4) = 28

The reason for the test failure I think is because of this code in
xfs_iext_count_may_overflow, which is called from xfs_create on the
parent directory:

	if (XFS_TEST_ERROR(false, ip->i_mount, XFS_ERRTAG_REDUCE_MAX_IEXTENTS))
		max_exts = 10;

	nr_exts = ifp->if_nextents + nr_to_add;
	if (nr_exts < ifp->if_nextents || nr_exts > max_ext)
		return -EFBIG

The second part of the if statement becomes (28 > 10) which is trivially
true, so we return -EFBIG for all attempts to create a file in a
directory.  xfs/529, in turn, cannot create $testfile because nothing
can create a file in $SCRATCH_MNT, and the test goes off the rails.

I think this can be trivially solved by changing this (and the other
tests) to ensure that the error injection is only set when we're running
a command to check if we get EFBIG.  In other words, this code in
xfs/529:

	rm $testfile

	echo "* Fallocate unwritten extents"

	echo "Fallocate fragmented file"
	for i in $(seq 0 2 $((nr_blks - 1))); do
		$XFS_IO_PROG -f -c "falloc $((i * bsize)) $bsize" $testfile \
		       >> $seqres.full 2>&1
		[[ $? != 0 ]] && break
	done

Should become:

	rm -f $testfile
	touch $testfile

	echo "* Fallocate unwritten extents"

	echo "Fallocate fragmented file"
	_scratch_inject_error reduce_max_iextents 1
	for i in $(seq 0 2 $((nr_blks - 1))); do
		$XFS_IO_PROG -c "falloc $((i * bsize)) $bsize" $testfile \
		       >> $seqres.full 2>&1
		[[ $? != 0 ]] && break
	done
	_scratch_inject_error reduce_max_iextents 0

With that patched up, xfs/529 passes on 1k block filesystems.  I suspect
the other tests in this series (xfs/531, 532, 534, and 535) are going to
need similar patching.

--D

> ---
>  tests/xfs/529     | 124 ++++++++++++++++++++++++++++++++++++++++++++++
>  tests/xfs/529.out |  11 ++++
>  tests/xfs/group   |   1 +
>  3 files changed, 136 insertions(+)
>  create mode 100755 tests/xfs/529
>  create mode 100644 tests/xfs/529.out
> 
> diff --git a/tests/xfs/529 b/tests/xfs/529
> new file mode 100755
> index 00000000..dd7019f5
> --- /dev/null
> +++ b/tests/xfs/529
> @@ -0,0 +1,124 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2021 Chandan Babu R.  All Rights Reserved.
> +#
> +# FS QA Test 529
> +#
> +# Verify that XFS does not cause bitmap/summary inode fork's extent count to
> +# overflow when growing an the realtime volume of the filesystem.
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1	# failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	_scratch_unmount >> $seqres.full 2>&1
> +	test -e "$rtdev" && losetup -d $rtdev >> $seqres.full 2>&1
> +	rm -f $tmp.* $TEST_DIR/$seq.rtvol
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +. ./common/inject
> +. ./common/populate
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +
> +_supported_fs xfs
> +# Note that we don't _require_realtime because we synthesize a rt volume
> +# below.
> +_require_test
> +_require_xfs_debug
> +_require_test_program "punch-alternating"
> +_require_xfs_io_error_injection "reduce_max_iextents"
> +_require_xfs_io_error_injection "bmap_alloc_minlen_extent"
> +_require_scratch_nocheck
> +
> +echo "* Test extending rt inodes"
> +
> +_scratch_mkfs | _filter_mkfs >> $seqres.full 2> $tmp.mkfs
> +. $tmp.mkfs
> +
> +echo "Create fake rt volume"
> +nr_bitmap_blks=25
> +nr_bits=$((nr_bitmap_blks * dbsize * 8))
> +
> +# Realtime extent size has to be atleast 4k in size.
> +if (( $dbsize < 4096 )); then
> +	rtextsz=4096
> +else
> +	rtextsz=$dbsize
> +fi
> +
> +rtdevsz=$((nr_bits * rtextsz))
> +truncate -s $rtdevsz $TEST_DIR/$seq.rtvol
> +rtdev=$(_create_loop_device $TEST_DIR/$seq.rtvol)
> +
> +echo "Format and mount rt volume"
> +
> +export USE_EXTERNAL=yes
> +export SCRATCH_RTDEV=$rtdev
> +_scratch_mkfs -d size=$((1024 * 1024 * 1024)) -b size=${dbsize} \
> +	      -r size=${rtextsz},extsize=${rtextsz} >> $seqres.full
> +_try_scratch_mount || _notrun "Couldn't mount fs with synthetic rt volume"
> +
> +echo "Consume free space"
> +fillerdir=$SCRATCH_MNT/fillerdir
> +nr_free_blks=$(stat -f -c '%f' $SCRATCH_MNT)
> +nr_free_blks=$((nr_free_blks * 90 / 100))
> +
> +_fill_fs $((dbsize * nr_free_blks)) $fillerdir $dbsize 0 >> $seqres.full 2>&1
> +
> +echo "Create fragmented filesystem"
> +for dentry in $(ls -1 $fillerdir/); do
> +	$here/src/punch-alternating $fillerdir/$dentry >> $seqres.full
> +done
> +
> +echo "Inject reduce_max_iextents error tag"
> +_scratch_inject_error reduce_max_iextents 1
> +
> +echo "Inject bmap_alloc_minlen_extent error tag"
> +_scratch_inject_error bmap_alloc_minlen_extent 1
> +
> +echo "Grow realtime volume"
> +$XFS_GROWFS_PROG -r $SCRATCH_MNT >> $seqres.full 2>&1
> +if [[ $? == 0 ]]; then
> +	echo "Growfs succeeded; should have failed."
> +	exit 1
> +fi
> +
> +_scratch_unmount >> $seqres.full
> +
> +echo "Verify rbmino's and rsumino's extent count"
> +for rtino in rbmino rsumino; do
> +	ino=$(_scratch_xfs_get_metadata_field $rtino "sb 0")
> +	echo "$rtino = $ino" >> $seqres.full
> +
> +	nextents=$(_scratch_get_iext_count $ino data || \
> +			_fail "Unable to obtain inode fork's extent count")
> +	if (( $nextents > 10 )); then
> +		echo "Extent count overflow check failed: nextents = $nextents"
> +		exit 1
> +	fi
> +done
> +
> +echo "Check filesystem"
> +_check_xfs_filesystem $SCRATCH_DEV none $rtdev
> +
> +losetup -d $rtdev
> +rm -f $TEST_DIR/$seq.rtvol
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/xfs/529.out b/tests/xfs/529.out
> new file mode 100644
> index 00000000..4ee113a4
> --- /dev/null
> +++ b/tests/xfs/529.out
> @@ -0,0 +1,11 @@
> +QA output created by 529
> +* Test extending rt inodes
> +Create fake rt volume
> +Format and mount rt volume
> +Consume free space
> +Create fragmented filesystem
> +Inject reduce_max_iextents error tag
> +Inject bmap_alloc_minlen_extent error tag
> +Grow realtime volume
> +Verify rbmino's and rsumino's extent count
> +Check filesystem
> diff --git a/tests/xfs/group b/tests/xfs/group
> index 2356c4a9..5dff7acb 100644
> --- a/tests/xfs/group
> +++ b/tests/xfs/group
> @@ -526,3 +526,4 @@
>  526 auto quick mkfs
>  527 auto quick quota
>  528 auto quick quota
> +529 auto quick realtime growfs
> -- 
> 2.29.2
>
Chandan Babu R March 23, 2021, 3:51 p.m. UTC | #3
On 22 Mar 2021 at 23:26, Darrick J. Wong wrote:
> On Tue, Mar 09, 2021 at 10:31:16AM +0530, Chandan Babu R wrote:
>> Verify that XFS does not cause realtime bitmap/summary inode fork's
>> extent count to overflow when growing the realtime volume associated
>> with a filesystem.
>>
>> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
>> Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com>
>
> Soo... I discovered that this test doesn't pass with multiblock
> directories:

Thanks for the bug report and the description of the corresponding solution. I
am fixing the tests and will soon post corresponding patches to the mailing
list.

--
chandan
Darrick J. Wong March 23, 2021, 8:57 p.m. UTC | #4
On Tue, Mar 23, 2021 at 09:21:27PM +0530, Chandan Babu R wrote:
> On 22 Mar 2021 at 23:26, Darrick J. Wong wrote:
> > On Tue, Mar 09, 2021 at 10:31:16AM +0530, Chandan Babu R wrote:
> >> Verify that XFS does not cause realtime bitmap/summary inode fork's
> >> extent count to overflow when growing the realtime volume associated
> >> with a filesystem.
> >>
> >> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> >> Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com>
> >
> > Soo... I discovered that this test doesn't pass with multiblock
> > directories:
> 
> Thanks for the bug report and the description of the corresponding solution. I
> am fixing the tests and will soon post corresponding patches to the mailing
> list.

Also, I found a problem with xfs/534 when it does the direct write tests
to a pmem volume with DAX enabled:

--- /tmp/fstests/tests/xfs/534.out      2021-03-21 11:44:09.384407426 -0700
+++ /var/tmp/fstests/xfs/534.out.bad    2021-03-23 13:32:15.898301839 -0700
@@ -5,7 +5,4 @@
 Fallocate 15 blocks
 Buffered write to every other block of fallocated space
 Verify $testfile's extent count
-* Direct write to unwritten extent
-Fallocate 15 blocks
-Direct write to every other block of fallocated space
-Verify $testfile's extent count
+Extent count overflow check failed: nextents = 11

looking at the xfs_bmap output for $testfile shows:

/opt/testfile:
 EXT: FILE-OFFSET      BLOCK-RANGE      AG AG-OFFSET        TOTAL FLAGS
   0: [0..7]:          208..215          0 (208..215)           8 010000
   1: [8..15]:         216..223          0 (216..223)           8 000000
   2: [16..23]:        224..231          0 (224..231)           8 010000
   3: [24..31]:        232..239          0 (232..239)           8 000000
   4: [32..39]:        240..247          0 (240..247)           8 010000
   5: [40..47]:        248..255          0 (248..255)           8 000000
   6: [48..55]:        256..263          0 (256..263)           8 010000
   7: [56..63]:        264..271          0 (264..271)           8 000000
   8: [64..71]:        272..279          0 (272..279)           8 010000
   9: [72..79]:        280..287          0 (280..287)           8 000000
  10: [80..119]:       288..327          0 (288..327)          40 010000

Which is ... odd since the same direct write gets cut off after writing
to block 7 (like you'd expect since it's the same function) when DAX
isn't enabled...

...OH, I see the problem.  For a non-DAX direct write,
xfs_iomap_write_direct will allocate an unwritten block into a hole, but
if the block was already mapped (written or unwritten) it won't do
anything at all.  For that case, XFS_IEXT_ADD_NOSPLIT_CNT is sufficient,
because in the worst case we add one extent to the data fork.

For DAX writes, however, the behavior is different:

	if (IS_DAX(VFS_I(ip))) {
		bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
		if (imap->br_state == XFS_EXT_UNWRITTEN) {
			force = true;
			dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
		}
	}

This tells xfs_bmapi_write that we want to /convert/ an unwritten extent
to written, and we want to zero the blocks.  If we're dax-writing into
the middle of an unwritten range, this will cause a split.  The correct
parameter there would be XFS_IEXT_WRITE_UNWRITTEN_CNT.  Would you mind
sending a kernel patch to fix that?

--D

> --
> chandan
Chandan Babu R March 24, 2021, 10:46 a.m. UTC | #5
On 24 Mar 2021 at 02:27, Darrick J. Wong wrote:
> On Tue, Mar 23, 2021 at 09:21:27PM +0530, Chandan Babu R wrote:
>> On 22 Mar 2021 at 23:26, Darrick J. Wong wrote:
>> > On Tue, Mar 09, 2021 at 10:31:16AM +0530, Chandan Babu R wrote:
>> >> Verify that XFS does not cause realtime bitmap/summary inode fork's
>> >> extent count to overflow when growing the realtime volume associated
>> >> with a filesystem.
>> >>
>> >> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
>> >> Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com>
>> >
>> > Soo... I discovered that this test doesn't pass with multiblock
>> > directories:
>>
>> Thanks for the bug report and the description of the corresponding solution. I
>> am fixing the tests and will soon post corresponding patches to the mailing
>> list.
>
> Also, I found a problem with xfs/534 when it does the direct write tests
> to a pmem volume with DAX enabled:
>
> --- /tmp/fstests/tests/xfs/534.out      2021-03-21 11:44:09.384407426 -0700
> +++ /var/tmp/fstests/xfs/534.out.bad    2021-03-23 13:32:15.898301839 -0700
> @@ -5,7 +5,4 @@
>  Fallocate 15 blocks
>  Buffered write to every other block of fallocated space
>  Verify $testfile's extent count
> -* Direct write to unwritten extent
> -Fallocate 15 blocks
> -Direct write to every other block of fallocated space
> -Verify $testfile's extent count
> +Extent count overflow check failed: nextents = 11

The inode extent overflow reported above was actually due to the buffered
write operation. But it does occur with direct write operation as well.

I was able to recreate the bug with an emulated pmem device on my qemu guest.

>
> looking at the xfs_bmap output for $testfile shows:
>
> /opt/testfile:
>  EXT: FILE-OFFSET      BLOCK-RANGE      AG AG-OFFSET        TOTAL FLAGS
>    0: [0..7]:          208..215          0 (208..215)           8 010000
>    1: [8..15]:         216..223          0 (216..223)           8 000000
>    2: [16..23]:        224..231          0 (224..231)           8 010000
>    3: [24..31]:        232..239          0 (232..239)           8 000000
>    4: [32..39]:        240..247          0 (240..247)           8 010000
>    5: [40..47]:        248..255          0 (248..255)           8 000000
>    6: [48..55]:        256..263          0 (256..263)           8 010000
>    7: [56..63]:        264..271          0 (264..271)           8 000000
>    8: [64..71]:        272..279          0 (272..279)           8 010000
>    9: [72..79]:        280..287          0 (280..287)           8 000000
>   10: [80..119]:       288..327          0 (288..327)          40 010000
>
> Which is ... odd since the same direct write gets cut off after writing
> to block 7 (like you'd expect since it's the same function) when DAX
> isn't enabled...
>
> ...OH, I see the problem.  For a non-DAX direct write,
> xfs_iomap_write_direct will allocate an unwritten block into a hole, but
> if the block was already mapped (written or unwritten) it won't do
> anything at all.  For that case, XFS_IEXT_ADD_NOSPLIT_CNT is sufficient,
> because in the worst case we add one extent to the data fork.
>
> For DAX writes, however, the behavior is different:
>
> 	if (IS_DAX(VFS_I(ip))) {
> 		bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
> 		if (imap->br_state == XFS_EXT_UNWRITTEN) {
> 			force = true;
> 			dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
> 		}
> 	}
>
> This tells xfs_bmapi_write that we want to /convert/ an unwritten extent
> to written, and we want to zero the blocks.  If we're dax-writing into
> the middle of an unwritten range, this will cause a split.  The correct
> parameter there would be XFS_IEXT_WRITE_UNWRITTEN_CNT.  Would you mind
> sending a kernel patch to fix that?

Sure, I will work on fixing both the buffered and direct IO extent overflow
issues.

Thanks for reporting the bug.

--
chandan
Chandan Babu R March 24, 2021, 2:17 p.m. UTC | #6
On 24 Mar 2021 at 16:16, Chandan Babu R wrote:
> On 24 Mar 2021 at 02:27, Darrick J. Wong wrote:
>> On Tue, Mar 23, 2021 at 09:21:27PM +0530, Chandan Babu R wrote:
>>> On 22 Mar 2021 at 23:26, Darrick J. Wong wrote:
>>> > On Tue, Mar 09, 2021 at 10:31:16AM +0530, Chandan Babu R wrote:
>>> >> Verify that XFS does not cause realtime bitmap/summary inode fork's
>>> >> extent count to overflow when growing the realtime volume associated
>>> >> with a filesystem.
>>> >>
>>> >> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
>>> >> Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com>
>>> >
>>> > Soo... I discovered that this test doesn't pass with multiblock
>>> > directories:
>>>
>>> Thanks for the bug report and the description of the corresponding solution. I
>>> am fixing the tests and will soon post corresponding patches to the mailing
>>> list.
>>
>> Also, I found a problem with xfs/534 when it does the direct write tests
>> to a pmem volume with DAX enabled:
>>
>> --- /tmp/fstests/tests/xfs/534.out      2021-03-21 11:44:09.384407426 -0700
>> +++ /var/tmp/fstests/xfs/534.out.bad    2021-03-23 13:32:15.898301839 -0700
>> @@ -5,7 +5,4 @@
>>  Fallocate 15 blocks
>>  Buffered write to every other block of fallocated space
>>  Verify $testfile's extent count
>> -* Direct write to unwritten extent
>> -Fallocate 15 blocks
>> -Direct write to every other block of fallocated space
>> -Verify $testfile's extent count
>> +Extent count overflow check failed: nextents = 11
>
> The inode extent overflow reported above was actually due to the buffered
> write operation. But it does occur with direct write operation as well.

I just found out that xfs_direct_write_iomap_ops is used for both buffered and
direct IO w.r.t dax devices. Please ignore the above statement.

--
chandan
diff mbox series

Patch

diff --git a/tests/xfs/529 b/tests/xfs/529
new file mode 100755
index 00000000..dd7019f5
--- /dev/null
+++ b/tests/xfs/529
@@ -0,0 +1,124 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2021 Chandan Babu R.  All Rights Reserved.
+#
+# FS QA Test 529
+#
+# Verify that XFS does not cause bitmap/summary inode fork's extent count to
+# overflow when growing an the realtime volume of the filesystem.
+#
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	_scratch_unmount >> $seqres.full 2>&1
+	test -e "$rtdev" && losetup -d $rtdev >> $seqres.full 2>&1
+	rm -f $tmp.* $TEST_DIR/$seq.rtvol
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/inject
+. ./common/populate
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+
+_supported_fs xfs
+# Note that we don't _require_realtime because we synthesize a rt volume
+# below.
+_require_test
+_require_xfs_debug
+_require_test_program "punch-alternating"
+_require_xfs_io_error_injection "reduce_max_iextents"
+_require_xfs_io_error_injection "bmap_alloc_minlen_extent"
+_require_scratch_nocheck
+
+echo "* Test extending rt inodes"
+
+_scratch_mkfs | _filter_mkfs >> $seqres.full 2> $tmp.mkfs
+. $tmp.mkfs
+
+echo "Create fake rt volume"
+nr_bitmap_blks=25
+nr_bits=$((nr_bitmap_blks * dbsize * 8))
+
+# Realtime extent size has to be atleast 4k in size.
+if (( $dbsize < 4096 )); then
+	rtextsz=4096
+else
+	rtextsz=$dbsize
+fi
+
+rtdevsz=$((nr_bits * rtextsz))
+truncate -s $rtdevsz $TEST_DIR/$seq.rtvol
+rtdev=$(_create_loop_device $TEST_DIR/$seq.rtvol)
+
+echo "Format and mount rt volume"
+
+export USE_EXTERNAL=yes
+export SCRATCH_RTDEV=$rtdev
+_scratch_mkfs -d size=$((1024 * 1024 * 1024)) -b size=${dbsize} \
+	      -r size=${rtextsz},extsize=${rtextsz} >> $seqres.full
+_try_scratch_mount || _notrun "Couldn't mount fs with synthetic rt volume"
+
+echo "Consume free space"
+fillerdir=$SCRATCH_MNT/fillerdir
+nr_free_blks=$(stat -f -c '%f' $SCRATCH_MNT)
+nr_free_blks=$((nr_free_blks * 90 / 100))
+
+_fill_fs $((dbsize * nr_free_blks)) $fillerdir $dbsize 0 >> $seqres.full 2>&1
+
+echo "Create fragmented filesystem"
+for dentry in $(ls -1 $fillerdir/); do
+	$here/src/punch-alternating $fillerdir/$dentry >> $seqres.full
+done
+
+echo "Inject reduce_max_iextents error tag"
+_scratch_inject_error reduce_max_iextents 1
+
+echo "Inject bmap_alloc_minlen_extent error tag"
+_scratch_inject_error bmap_alloc_minlen_extent 1
+
+echo "Grow realtime volume"
+$XFS_GROWFS_PROG -r $SCRATCH_MNT >> $seqres.full 2>&1
+if [[ $? == 0 ]]; then
+	echo "Growfs succeeded; should have failed."
+	exit 1
+fi
+
+_scratch_unmount >> $seqres.full
+
+echo "Verify rbmino's and rsumino's extent count"
+for rtino in rbmino rsumino; do
+	ino=$(_scratch_xfs_get_metadata_field $rtino "sb 0")
+	echo "$rtino = $ino" >> $seqres.full
+
+	nextents=$(_scratch_get_iext_count $ino data || \
+			_fail "Unable to obtain inode fork's extent count")
+	if (( $nextents > 10 )); then
+		echo "Extent count overflow check failed: nextents = $nextents"
+		exit 1
+	fi
+done
+
+echo "Check filesystem"
+_check_xfs_filesystem $SCRATCH_DEV none $rtdev
+
+losetup -d $rtdev
+rm -f $TEST_DIR/$seq.rtvol
+
+# success, all done
+status=0
+exit
diff --git a/tests/xfs/529.out b/tests/xfs/529.out
new file mode 100644
index 00000000..4ee113a4
--- /dev/null
+++ b/tests/xfs/529.out
@@ -0,0 +1,11 @@ 
+QA output created by 529
+* Test extending rt inodes
+Create fake rt volume
+Format and mount rt volume
+Consume free space
+Create fragmented filesystem
+Inject reduce_max_iextents error tag
+Inject bmap_alloc_minlen_extent error tag
+Grow realtime volume
+Verify rbmino's and rsumino's extent count
+Check filesystem
diff --git a/tests/xfs/group b/tests/xfs/group
index 2356c4a9..5dff7acb 100644
--- a/tests/xfs/group
+++ b/tests/xfs/group
@@ -526,3 +526,4 @@ 
 526 auto quick mkfs
 527 auto quick quota
 528 auto quick quota
+529 auto quick realtime growfs