diff mbox series

generic: test which tries to exercise AIO/DIO into unwritten space

Message ID 20210210205818.1494305-1-tytso@mit.edu (mailing list archive)
State New, archived
Headers show
Series generic: test which tries to exercise AIO/DIO into unwritten space | expand

Commit Message

Theodore Ts'o Feb. 10, 2021, 8:58 p.m. UTC
This test verifies that the an unwritten extent is properly marked as
written after writing into it.

There was a hard-to-hit bug which would occasionally trigger with ext4
for which this test was a reproducer.  This has been fixed after
moving ext4 to use iomap for Direct I/O's, although as of this
writing, there are still some occasional failures on ext4 when block
size < page size.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 tests/generic/623     | 109 ++++++++++++++++++++++++++++++++++++++++++
 tests/generic/623.out |   4 ++
 tests/generic/group   |   1 +
 3 files changed, 114 insertions(+)
 create mode 100755 tests/generic/623
 create mode 100644 tests/generic/623.out

Comments

Theodore Ts'o March 1, 2021, 5:02 p.m. UTC | #1
On Wed, Feb 10, 2021 at 03:58:18PM -0500, Theodore Ts'o wrote:
> This test verifies that the an unwritten extent is properly marked as
> written after writing into it.
> 
> There was a hard-to-hit bug which would occasionally trigger with ext4
> for which this test was a reproducer.  This has been fixed after
> moving ext4 to use iomap for Direct I/O's, although as of this
> writing, there are still some occasional failures on ext4 when block
> size < page size.
> 
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>

Ping?

> ---
>  tests/generic/623     | 109 ++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/623.out |   4 ++
>  tests/generic/group   |   1 +
>  3 files changed, 114 insertions(+)
>  create mode 100755 tests/generic/623
>  create mode 100644 tests/generic/623.out
> 
> diff --git a/tests/generic/623 b/tests/generic/623
> new file mode 100755
> index 00000000..74136fef
> --- /dev/null
> +++ b/tests/generic/623
> @@ -0,0 +1,109 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +
> +#
> +# FSQA Test No. 623
> +#
> +# AIO/DIO stress test
> +# Run random AIO/DIO activity on an file system with unwritten regions
> +#
> +# This test verifies that the an unwritten extent is properly marked
> +# as written after writing into it.
> +#
> +# There was a hard-to-hit bug which would occasionally trigger with
> +# ext4 for which this test was a reproducer.  This has been fixed
> +# after moving ext4 to use iomap for Direct I/O's, although as of this
> +# writing, there are still some occasional failures on ext4 when block
> +# size < page size.
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +fio_config=$tmp.fio
> +status=1	# failure is the default!
> +trap "rm -f $tmp.*; exit \$status" 0 1 2 3 15
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +# real QA test starts here
> +_supported_fs generic
> +_require_test
> +_require_scratch
> +_require_odirect
> +_require_block_device $SCRATCH_DEV
> +
> +NUM_JOBS=$((4*LOAD_FACTOR))
> +BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
> +FILE_SIZE=$(((BLK_DEV_SIZE * 512) * 3 / 4))
> +
> +max_file_size=$((5 * 1024 * 1024 * 1024))
> +if [ $max_file_size -lt $FILE_SIZE ]; then
> +	FILE_SIZE=$max_file_size
> +fi
> +SIZE=$((FILE_SIZE / 2))
> +
> +cat >$fio_config <<EOF
> +###########
> +# $seq test fio activity
> +# Filenames derived from jobsname and jobid like follows:
> +# ${JOB_NAME}.${JOB_ID}.${ITERATION_ID}
> +[global]
> +ioengine=libaio
> +bs=128k
> +directory=${SCRATCH_MNT}
> +filesize=${FILE_SIZE}
> +size=${SIZE}
> +iodepth=$((128*$LOAD_FACTOR))
> +fallocate=native
> +
> +# Perform direct aio and verify data
> +# This test case should check use-after-free issues
> +[aio-dio-verifier]
> +numjobs=1
> +verify=crc32c-intel
> +verify_fatal=1
> +verify_dump=1
> +verify_backlog=1024
> +verify_async=4
> +direct=1
> +blocksize_range=4100k-8200k
> +blockalign=4k
> +rw=randwrite
> +filename=test-file
> +
> +EOF
> +
> +rm -f $seqres.full
> +
> +_require_fio $fio_config
> +_require_xfs_io_command "falloc"
> +
> +_workout()
> +{
> +	echo ""
> +	echo "Run fio with random aio-dio pattern"
> +	echo ""
> +	cat $fio_config >>  $seqres.full
> +	run_check $FIO_PROG $fio_config
> +}
> +
> +_scratch_mkfs >> $seqres.full 2>&1
> +_scratch_mount
> +
> +if ! _workout; then
> +	_scratch_unmount 2>/dev/null
> +	exit
> +fi
> +
> +if ! _scratch_unmount; then
> +	echo "failed to umount"
> +	status=1
> +	exit
> +fi
> +status=0
> +exit
> diff --git a/tests/generic/623.out b/tests/generic/623.out
> new file mode 100644
> index 00000000..e10c7fd9
> --- /dev/null
> +++ b/tests/generic/623.out
> @@ -0,0 +1,4 @@
> +QA output created by 623
> +
> +Run fio with random aio-dio pattern
> +
> diff --git a/tests/generic/group b/tests/generic/group
> index b10fdea4..24f53ed7 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -625,3 +625,4 @@
>  620 auto mount quick
>  621 auto quick encrypt
>  622 auto shutdown metadata atime
> +623 aio rw stress
> -- 
> 2.30.0
>
Darrick J. Wong March 1, 2021, 5:14 p.m. UTC | #2
On Wed, Feb 10, 2021 at 03:58:18PM -0500, Theodore Ts'o wrote:
> This test verifies that the an unwritten extent is properly marked as
> written after writing into it.
> 
> There was a hard-to-hit bug which would occasionally trigger with ext4
> for which this test was a reproducer.  This has been fixed after
> moving ext4 to use iomap for Direct I/O's, although as of this
> writing, there are still some occasional failures on ext4 when block
> size < page size.

Are there still failures?

> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> ---
>  tests/generic/623     | 109 ++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/623.out |   4 ++
>  tests/generic/group   |   1 +
>  3 files changed, 114 insertions(+)
>  create mode 100755 tests/generic/623
>  create mode 100644 tests/generic/623.out
> 
> diff --git a/tests/generic/623 b/tests/generic/623
> new file mode 100755
> index 00000000..74136fef
> --- /dev/null
> +++ b/tests/generic/623
> @@ -0,0 +1,109 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +
> +#
> +# FSQA Test No. 623
> +#
> +# AIO/DIO stress test
> +# Run random AIO/DIO activity on an file system with unwritten regions
> +#
> +# This test verifies that the an unwritten extent is properly marked
> +# as written after writing into it.
> +#
> +# There was a hard-to-hit bug which would occasionally trigger with
> +# ext4 for which this test was a reproducer.  This has been fixed
> +# after moving ext4 to use iomap for Direct I/O's, although as of this
> +# writing, there are still some occasional failures on ext4 when block
> +# size < page size.
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +fio_config=$tmp.fio
> +status=1	# failure is the default!
> +trap "rm -f $tmp.*; exit \$status" 0 1 2 3 15
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +# real QA test starts here
> +_supported_fs generic
> +_require_test
> +_require_scratch
> +_require_odirect
> +_require_block_device $SCRATCH_DEV
> +
> +NUM_JOBS=$((4*LOAD_FACTOR))
> +BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
> +FILE_SIZE=$(((BLK_DEV_SIZE * 512) * 3 / 4))
> +
> +max_file_size=$((5 * 1024 * 1024 * 1024))
> +if [ $max_file_size -lt $FILE_SIZE ]; then
> +	FILE_SIZE=$max_file_size
> +fi
> +SIZE=$((FILE_SIZE / 2))
> +
> +cat >$fio_config <<EOF
> +###########
> +# $seq test fio activity
> +# Filenames derived from jobsname and jobid like follows:
> +# ${JOB_NAME}.${JOB_ID}.${ITERATION_ID}
> +[global]
> +ioengine=libaio
> +bs=128k
> +directory=${SCRATCH_MNT}
> +filesize=${FILE_SIZE}
> +size=${SIZE}
> +iodepth=$((128*$LOAD_FACTOR))
> +fallocate=native
> +
> +# Perform direct aio and verify data
> +# This test case should check use-after-free issues
> +[aio-dio-verifier]
> +numjobs=1
> +verify=crc32c-intel
> +verify_fatal=1
> +verify_dump=1
> +verify_backlog=1024
> +verify_async=4
> +direct=1
> +blocksize_range=4100k-8200k
> +blockalign=4k
> +rw=randwrite
> +filename=test-file
> +
> +EOF
> +
> +rm -f $seqres.full
> +
> +_require_fio $fio_config
> +_require_xfs_io_command "falloc"
> +
> +_workout()
> +{
> +	echo ""
> +	echo "Run fio with random aio-dio pattern"
> +	echo ""
> +	cat $fio_config >>  $seqres.full
> +	run_check $FIO_PROG $fio_config
> +}
> +
> +_scratch_mkfs >> $seqres.full 2>&1
> +_scratch_mount
> +
> +if ! _workout; then

If _workout fails, will something be printed to the golden output file
so that the failure is obvious?

If so,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> +	_scratch_unmount 2>/dev/null
> +	exit
> +fi
> +
> +if ! _scratch_unmount; then
> +	echo "failed to umount"
> +	status=1
> +	exit
> +fi
> +status=0
> +exit
> diff --git a/tests/generic/623.out b/tests/generic/623.out
> new file mode 100644
> index 00000000..e10c7fd9
> --- /dev/null
> +++ b/tests/generic/623.out
> @@ -0,0 +1,4 @@
> +QA output created by 623
> +
> +Run fio with random aio-dio pattern
> +
> diff --git a/tests/generic/group b/tests/generic/group
> index b10fdea4..24f53ed7 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -625,3 +625,4 @@
>  620 auto mount quick
>  621 auto quick encrypt
>  622 auto shutdown metadata atime
> +623 aio rw stress
> -- 
> 2.30.0
>
Theodore Ts'o March 1, 2021, 6:04 p.m. UTC | #3
On Mon, Mar 01, 2021 at 09:14:40AM -0800, Darrick J. Wong wrote:
> On Wed, Feb 10, 2021 at 03:58:18PM -0500, Theodore Ts'o wrote:
> > This test verifies that the an unwritten extent is properly marked as
> > written after writing into it.
> > 
> > There was a hard-to-hit bug which would occasionally trigger with ext4
> > for which this test was a reproducer.  This has been fixed after
> > moving ext4 to use iomap for Direct I/O's, although as of this
> > writing, there are still some occasional failures on ext4 when block
> > size < page size.
> 
> Are there still failures?

Yes.  It's on Ritesh's todo list, but I don't believe it's bubbled up
to the top of his list.

The failure rate is substantially less than what I had been seeing
with 4k block sizes before the move to iomap, and in practice it was
hard to hit in production.  We noticed on Google data center workloads
(but even then it was impossible to reliably repro before I found this
fio recipe); as far as I know no one else had reported seeing this
problem.

So in summary, as of this writing, on 1k and 2k block sizes, this test
is still occasionally failing for ext4 (but not xfs, where it reliably
passes).  On 4k block sizes on x86 with ext4, this test is still
failing in on 5.4 and earler kernels, unless someone has managed to
backport DIO iomap support for ext4 into an enterprise linux kernel.

					- Ted
Darrick J. Wong March 2, 2021, 3:33 a.m. UTC | #4
On Tue, Mar 02, 2021 at 11:20:06AM +0800, Su Yue wrote:
> On Tue, Mar 2, 2021 at 2:10 AM Theodore Ts'o <tytso@mit.edu> wrote:
> >
> > On Mon, Mar 01, 2021 at 09:14:40AM -0800, Darrick J. Wong wrote:
> > > On Wed, Feb 10, 2021 at 03:58:18PM -0500, Theodore Ts'o wrote:
> > > > This test verifies that the an unwritten extent is properly marked as
> > > > written after writing into it.
> > > >
> > > > There was a hard-to-hit bug which would occasionally trigger with ext4
> > > > for which this test was a reproducer.  This has been fixed after
> > > > moving ext4 to use iomap for Direct I/O's, although as of this
> > > > writing, there are still some occasional failures on ext4 when block
> > > > size < page size.
> > >
> > > Are there still failures?
> >
> > Yes.  It's on Ritesh's todo list, but I don't believe it's bubbled up
> > to the top of his list.
> >
> > The failure rate is substantially less than what I had been seeing
> > with 4k block sizes before the move to iomap, and in practice it was
> > hard to hit in production.  We noticed on Google data center workloads
> > (but even then it was impossible to reliably repro before I found this
> > fio recipe); as far as I know no one else had reported seeing this
> > problem.
> >
> > So in summary, as of this writing, on 1k and 2k block sizes, this test
> > is still occasionally failing for ext4 (but not xfs, where it reliably
> > passes).  On 4k block sizes on x86 with ext4, this test is still
> > failing in on 5.4 and earler kernels, unless someone has managed to
> > backport DIO iomap support for ext4 into an enterprise linux kernel.
> >
> But both xfs and ext4 100% fail in v5.12-rc1 with 4k block size and 4k page
> size on x86_64. The test itself should be okay because btrfs passes.
> xfstests-dev version is 8cbc48b460b6 applied this patch. logs are attached.

Is this generic/623?  That test is new, and I've noticed it'll fail if
you have coredumps enabled because the test tries to induce a SIGBUS.

--D

> 
> # fio --version
> fio-3.25
> 
> #cat local.config
> export TEST_DEV=/dev/mapper/test-1
> export TEST_DIR=/mnt/test
> export SCRATCH_DEV_POOL="/dev/mapper/test-[2-6]"
> export SCRATCH_MNT=/mnt/scratch
> export LOGWRITES_DEV="/dev/mapper/test-7"
> export KEEP_DMESG=yes
> 
> >                                         - Ted
Su Yue March 2, 2021, 4:25 a.m. UTC | #5
On Tue, Mar 2, 2021 at 11:33 AM Darrick J. Wong <djwong@kernel.org> wrote:
>
> On Tue, Mar 02, 2021 at 11:20:06AM +0800, Su Yue wrote:
> > On Tue, Mar 2, 2021 at 2:10 AM Theodore Ts'o <tytso@mit.edu> wrote:
> > >
> > > On Mon, Mar 01, 2021 at 09:14:40AM -0800, Darrick J. Wong wrote:
> > > > On Wed, Feb 10, 2021 at 03:58:18PM -0500, Theodore Ts'o wrote:
> > > > > This test verifies that the an unwritten extent is properly marked as
> > > > > written after writing into it.
> > > > >
> > > > > There was a hard-to-hit bug which would occasionally trigger with ext4
> > > > > for which this test was a reproducer.  This has been fixed after
> > > > > moving ext4 to use iomap for Direct I/O's, although as of this
> > > > > writing, there are still some occasional failures on ext4 when block
> > > > > size < page size.
> > > >
> > > > Are there still failures?
> > >
> > > Yes.  It's on Ritesh's todo list, but I don't believe it's bubbled up
> > > to the top of his list.
> > >
> > > The failure rate is substantially less than what I had been seeing
> > > with 4k block sizes before the move to iomap, and in practice it was
> > > hard to hit in production.  We noticed on Google data center workloads
> > > (but even then it was impossible to reliably repro before I found this
> > > fio recipe); as far as I know no one else had reported seeing this
> > > problem.
> > >
> > > So in summary, as of this writing, on 1k and 2k block sizes, this test
> > > is still occasionally failing for ext4 (but not xfs, where it reliably
> > > passes).  On 4k block sizes on x86 with ext4, this test is still
> > > failing in on 5.4 and earler kernels, unless someone has managed to
> > > backport DIO iomap support for ext4 into an enterprise linux kernel.
> > >
> > But both xfs and ext4 100% fail in v5.12-rc1 with 4k block size and 4k page
> > size on x86_64. The test itself should be okay because btrfs passes.
> > xfstests-dev version is 8cbc48b460b6 applied this patch. logs are attached.
>
> Is this generic/623?  That test is new, and I've noticed it'll fail if
> you have coredumps enabled because the test tries to induce a SIGBUS.
>
Hmm.. I think the "generic/623" you mean is introduced by 011bfb01f7f9
("generic: test
mapped write after shutdown and failed writeback"). The generic/623 which
I mean is the commit 8cbc48b460b6(011bfb01f7f9~1) applied Theodore's
patch "generic: test which tries to exercise AIO/DIO into unwritten space".

--
Su
> --D
>
> >
> > # fio --version
> > fio-3.25
> >
> > #cat local.config
> > export TEST_DEV=/dev/mapper/test-1
> > export TEST_DIR=/mnt/test
> > export SCRATCH_DEV_POOL="/dev/mapper/test-[2-6]"
> > export SCRATCH_MNT=/mnt/scratch
> > export LOGWRITES_DEV="/dev/mapper/test-7"
> > export KEEP_DMESG=yes
> >
> > >                                         - Ted
>
>
>
>
Theodore Ts'o March 3, 2021, 8:02 p.m. UTC | #6
On Tue, Mar 02, 2021 at 11:20:06AM +0800, Su Yue wrote:
> But both xfs and ext4 100% fail in v5.12-rc1 with 4k block size and 4k page
> size on x86_64. The test itself should be okay because btrfs passes.
> xfstests-dev version is 8cbc48b460b6 applied this patch. logs are attached.

Hmm, interesting.  Thanks for the report.  I've been running most of
my tests on a GCE VVM (using gce-xfstests) with a 100GB PD-SSD as the
storage device.  I just tried, and it's passing now reliably on both
4k and 1k for me, both on v5.11 and v5.12-rc1.  (Previously it was
failing about 30% of the time on the ext4/1k test scenario, and was
passing 100% on ext4/4k.)

Given your report, I tried running it using KVM (with a NVME ssd as
the back-end storage device), and it's failing 80% of the time using
the 4k block size.

So previously I had assumed that it was passing consistently on with
ext4/4k, but it looks like that it's very much dependent on the
performance of the TEST_DEV.

					- Ted
Eryu Guan March 7, 2021, 4:24 p.m. UTC | #7
On Wed, Feb 10, 2021 at 03:58:18PM -0500, Theodore Ts'o wrote:
> This test verifies that the an unwritten extent is properly marked as
> written after writing into it.
> 
> There was a hard-to-hit bug which would occasionally trigger with ext4
> for which this test was a reproducer.  This has been fixed after
> moving ext4 to use iomap for Direct I/O's, although as of this
> writing, there are still some occasional failures on ext4 when block
> size < page size.
> 
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>

Sorry for the late review, it was in my to-review list and I just got to
it..

> ---
>  tests/generic/623     | 109 ++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/623.out |   4 ++
>  tests/generic/group   |   1 +
>  3 files changed, 114 insertions(+)
>  create mode 100755 tests/generic/623
>  create mode 100644 tests/generic/623.out
> 
> diff --git a/tests/generic/623 b/tests/generic/623
> new file mode 100755
> index 00000000..74136fef
> --- /dev/null
> +++ b/tests/generic/623
> @@ -0,0 +1,109 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +
> +#
> +# FSQA Test No. 623
> +#
> +# AIO/DIO stress test
> +# Run random AIO/DIO activity on an file system with unwritten regions
> +#
> +# This test verifies that the an unwritten extent is properly marked
> +# as written after writing into it.
> +#
> +# There was a hard-to-hit bug which would occasionally trigger with
> +# ext4 for which this test was a reproducer.  This has been fixed
> +# after moving ext4 to use iomap for Direct I/O's, although as of this
> +# writing, there are still some occasional failures on ext4 when block
> +# size < page size.
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +fio_config=$tmp.fio
> +status=1	# failure is the default!
> +trap "rm -f $tmp.*; exit \$status" 0 1 2 3 15

Better to trap a _cleanup function, even we only do "rm -f $tmp.*" in it,
so it's consistent to other tests, and it's easier to add more cleanups
in _cleanup() function in the future if needed.

> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +# real QA test starts here
> +_supported_fs generic
> +_require_test
> +_require_scratch
> +_require_odirect

_require_aio

> +_require_block_device $SCRATCH_DEV
> +
> +NUM_JOBS=$((4*LOAD_FACTOR))
> +BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
> +FILE_SIZE=$(((BLK_DEV_SIZE * 512) * 3 / 4))
> +
> +max_file_size=$((5 * 1024 * 1024 * 1024))
> +if [ $max_file_size -lt $FILE_SIZE ]; then
> +	FILE_SIZE=$max_file_size
> +fi
> +SIZE=$((FILE_SIZE / 2))
> +
> +cat >$fio_config <<EOF
> +###########
> +# $seq test fio activity
> +# Filenames derived from jobsname and jobid like follows:
> +# ${JOB_NAME}.${JOB_ID}.${ITERATION_ID}
> +[global]
> +ioengine=libaio
> +bs=128k
> +directory=${SCRATCH_MNT}
> +filesize=${FILE_SIZE}
> +size=${SIZE}
> +iodepth=$((128*$LOAD_FACTOR))
> +fallocate=native
> +
> +# Perform direct aio and verify data
> +# This test case should check use-after-free issues
> +[aio-dio-verifier]
> +numjobs=1
> +verify=crc32c-intel
> +verify_fatal=1
> +verify_dump=1
> +verify_backlog=1024
> +verify_async=4
> +direct=1
> +blocksize_range=4100k-8200k
> +blockalign=4k
> +rw=randwrite
> +filename=test-file
> +
> +EOF
> +
> +rm -f $seqres.full
> +
> +_require_fio $fio_config
> +_require_xfs_io_command "falloc"
> +
> +_workout()

There's no need to add the leading "_" to local function, it's reserved
to common functions.

> +{
> +	echo ""
> +	echo "Run fio with random aio-dio pattern"
> +	echo ""
> +	cat $fio_config >>  $seqres.full
> +	run_check $FIO_PROG $fio_config

run_check is not recommanded and should be deprecated (maybe I should
send a patch to document it in comment), as it hides failure in
$seqres.full and exits if command returns non-zero.

Just call $FIO_PROG command directly and check return value if
necessary.

> +}
> +
> +_scratch_mkfs >> $seqres.full 2>&1
> +_scratch_mount
> +
> +if ! _workout; then
> +	_scratch_unmount 2>/dev/null
> +	exit
> +fi
> +
> +if ! _scratch_unmount; then
> +	echo "failed to umount"
> +	status=1
> +	exit
> +fi

Is above _scratch_unmount check really needed? The test harness would
umount $SCRATCH_DEV after test anyway.

Thanks,
Eryu

> +status=0
> +exit
> diff --git a/tests/generic/623.out b/tests/generic/623.out
> new file mode 100644
> index 00000000..e10c7fd9
> --- /dev/null
> +++ b/tests/generic/623.out
> @@ -0,0 +1,4 @@
> +QA output created by 623
> +
> +Run fio with random aio-dio pattern
> +
> diff --git a/tests/generic/group b/tests/generic/group
> index b10fdea4..24f53ed7 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -625,3 +625,4 @@
>  620 auto mount quick
>  621 auto quick encrypt
>  622 auto shutdown metadata atime
> +623 aio rw stress
> -- 
> 2.30.0
Theodore Ts'o March 7, 2021, 11:11 p.m. UTC | #8
On Mon, Mar 08, 2021 at 12:24:33AM +0800, Eryu Guan wrote:
> > +trap "rm -f $tmp.*; exit \$status" 0 1 2 3 15
> 
> Better to trap a _cleanup function, even we only do "rm -f $tmp.*" in it,
> so it's consistent to other tests, and it's easier to add more cleanups
> in _cleanup() function in the future if needed.

Done.  I had based this test on generic/299 and generic/300, and a lot
of your comments are applicable to them as well.  I can send a cleanup
patch to fix up those patches as well..

> > +_require_odirect
> 
> _require_aio

Hmmm, generic/299 and generic/300 are missing _require_aio, while
doing async I/O.  I'm a bit surprised this hasn't caused problems for
other file systems.

> > +
> > +_workout()
> 
> There's no need to add the leading "_" to local function, it's reserved
> to common functions.

Done.  (Actually, if we're not unmounting $SCRATCH, then we really
don't need a workout local function at all.)

> > +	run_check $FIO_PROG $fio_config
> 
> run_check is not recommanded and should be deprecated (maybe I should
> send a patch to document it in comment), as it hides failure in
> $seqres.full and exits if command returns non-zero.
> 
> Just call $FIO_PROG command directly and check return value if
> necessary.

Thanks for suggesting dropping the run_check.  I found a problem in
the fio receipe which was causing a FIO warning that I had been
missing.

BTW, all but one of the generic are still using run_check, and in the
one exception, generic/095, which uses --output=$seqres.full which is
causing us to lose all of the output of the earlier commands which
redirected their outputs to $seqres.full.  So there's clearly a
"target rich environment" in terms of test clean up opportunities.

> > +if ! _scratch_unmount; then
> > +	echo "failed to umount"
> > +	status=1
> > +	exit
> > +fi
> 
> Is above _scratch_unmount check really needed? The test harness would
> umount $SCRATCH_DEV after test anyway.

Done.

Thanks for the review,

				- Ted
diff mbox series

Patch

diff --git a/tests/generic/623 b/tests/generic/623
new file mode 100755
index 00000000..74136fef
--- /dev/null
+++ b/tests/generic/623
@@ -0,0 +1,109 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# FSQA Test No. 623
+#
+# AIO/DIO stress test
+# Run random AIO/DIO activity on an file system with unwritten regions
+#
+# This test verifies that the an unwritten extent is properly marked
+# as written after writing into it.
+#
+# There was a hard-to-hit bug which would occasionally trigger with
+# ext4 for which this test was a reproducer.  This has been fixed
+# after moving ext4 to use iomap for Direct I/O's, although as of this
+# writing, there are still some occasional failures on ext4 when block
+# size < page size.
+#
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+fio_config=$tmp.fio
+status=1	# failure is the default!
+trap "rm -f $tmp.*; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs generic
+_require_test
+_require_scratch
+_require_odirect
+_require_block_device $SCRATCH_DEV
+
+NUM_JOBS=$((4*LOAD_FACTOR))
+BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
+FILE_SIZE=$(((BLK_DEV_SIZE * 512) * 3 / 4))
+
+max_file_size=$((5 * 1024 * 1024 * 1024))
+if [ $max_file_size -lt $FILE_SIZE ]; then
+	FILE_SIZE=$max_file_size
+fi
+SIZE=$((FILE_SIZE / 2))
+
+cat >$fio_config <<EOF
+###########
+# $seq test fio activity
+# Filenames derived from jobsname and jobid like follows:
+# ${JOB_NAME}.${JOB_ID}.${ITERATION_ID}
+[global]
+ioengine=libaio
+bs=128k
+directory=${SCRATCH_MNT}
+filesize=${FILE_SIZE}
+size=${SIZE}
+iodepth=$((128*$LOAD_FACTOR))
+fallocate=native
+
+# Perform direct aio and verify data
+# This test case should check use-after-free issues
+[aio-dio-verifier]
+numjobs=1
+verify=crc32c-intel
+verify_fatal=1
+verify_dump=1
+verify_backlog=1024
+verify_async=4
+direct=1
+blocksize_range=4100k-8200k
+blockalign=4k
+rw=randwrite
+filename=test-file
+
+EOF
+
+rm -f $seqres.full
+
+_require_fio $fio_config
+_require_xfs_io_command "falloc"
+
+_workout()
+{
+	echo ""
+	echo "Run fio with random aio-dio pattern"
+	echo ""
+	cat $fio_config >>  $seqres.full
+	run_check $FIO_PROG $fio_config
+}
+
+_scratch_mkfs >> $seqres.full 2>&1
+_scratch_mount
+
+if ! _workout; then
+	_scratch_unmount 2>/dev/null
+	exit
+fi
+
+if ! _scratch_unmount; then
+	echo "failed to umount"
+	status=1
+	exit
+fi
+status=0
+exit
diff --git a/tests/generic/623.out b/tests/generic/623.out
new file mode 100644
index 00000000..e10c7fd9
--- /dev/null
+++ b/tests/generic/623.out
@@ -0,0 +1,4 @@ 
+QA output created by 623
+
+Run fio with random aio-dio pattern
+
diff --git a/tests/generic/group b/tests/generic/group
index b10fdea4..24f53ed7 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -625,3 +625,4 @@ 
 620 auto mount quick
 621 auto quick encrypt
 622 auto shutdown metadata atime
+623 aio rw stress