diff mbox series

generic: add gc stress test

Message ID 20240415112259.21760-1-hans.holmberg@wdc.com (mailing list archive)
State New
Headers show
Series generic: add gc stress test | expand

Commit Message

Hans Holmberg April 15, 2024, 11:23 a.m. UTC
This test stresses garbage collection for file systems by first filling
up a scratch mount to a specific usage point with files of random size,
then doing overwrites in parallel with deletes to fragment the backing
storage, forcing reclaim.

Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
---

Test results in my setup (kernel 6.8.0-rc4+)
	f2fs on zoned nullblk: pass (77s)
	f2fs on conventional nvme ssd: pass (13s)
	btrfs on zoned nublk: fails (-ENOSPC)
	btrfs on conventional nvme ssd: fails (-ENOSPC)
	xfs on conventional nvme ssd: pass (8s)

Johannes(cc) is working on the btrfs ENOSPC issue.
	 
 tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
 tests/generic/744.out |   6 ++
 2 files changed, 130 insertions(+)
 create mode 100755 tests/generic/744
 create mode 100644 tests/generic/744.out

Comments

Hans Holmberg April 16, 2024, 9:07 a.m. UTC | #1
+Zorro (doh!)

On 2024-04-15 13:23, Hans Holmberg wrote:
> This test stresses garbage collection for file systems by first filling
> up a scratch mount to a specific usage point with files of random size,
> then doing overwrites in parallel with deletes to fragment the backing
> storage, forcing reclaim.
> 
> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> ---
> 
> Test results in my setup (kernel 6.8.0-rc4+)
> 	f2fs on zoned nullblk: pass (77s)
> 	f2fs on conventional nvme ssd: pass (13s)
> 	btrfs on zoned nublk: fails (-ENOSPC)
> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
> 	xfs on conventional nvme ssd: pass (8s)
> 
> Johannes(cc) is working on the btrfs ENOSPC issue.
> 	
>   tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
>   tests/generic/744.out |   6 ++
>   2 files changed, 130 insertions(+)
>   create mode 100755 tests/generic/744
>   create mode 100644 tests/generic/744.out
> 
> diff --git a/tests/generic/744 b/tests/generic/744
> new file mode 100755
> index 000000000000..2c7ab76bf8b1
> --- /dev/null
> +++ b/tests/generic/744
> @@ -0,0 +1,124 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
> +#
> +# FS QA Test No. 744
> +#
> +# Inspired by btrfs/273 and generic/015
> +#
> +# This test stresses garbage collection in file systems
> +# by first filling up a scratch mount to a specific usage point with
> +# files of random size, then doing overwrites in parallel with
> +# deletes to fragment the backing zones, forcing reclaim.
> +
> +. ./common/preamble
> +_begin_fstest auto
> +
> +# real QA test starts here
> +
> +_require_scratch
> +
> +# This test requires specific data space usage, skip if we have compression
> +# enabled.
> +_require_no_compress
> +
> +M=$((1024 * 1024))
> +min_fsz=$((1 * ${M}))
> +max_fsz=$((256 * ${M}))
> +bs=${M}
> +fill_percent=95
> +overwrite_percentage=20
> +seq=0
> +
> +_create_file() {
> +	local file_name=${SCRATCH_MNT}/data_$1
> +	local file_sz=$2
> +	local dd_extra=$3
> +
> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
> +		status=none $dd_extra  2>&1
> +
> +	status=$?
> +	if [ $status -ne 0 ]; then
> +		echo "Failed writing $file_name" >>$seqres.full
> +		exit
> +	fi
> +}
> +
> +_total_M() {
> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
> +	echo $(( ${total} * ${bs} / ${M}))
> +}
> +
> +_used_percent() {
> +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> +	echo $((100 - (100 * ${available}) / ${total} ))
> +}
> +
> +
> +_delete_random_file() {
> +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
> +	rm $to_delete
> +	sync ${SCRATCH_MNT}
> +}
> +
> +_get_random_fsz() {
> +	local r=$RANDOM
> +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
> +}
> +
> +_direct_fillup () {
> +	while [ $(_used_percent) -lt $fill_percent ]; do
> +		local fsz=$(_get_random_fsz)
> +
> +		_create_file $seq $fsz "oflag=direct conv=fsync"
> +		seq=$((${seq} + 1))
> +	done
> +}
> +
> +_mixed_write_delete() {
> +	local dd_extra=$1
> +	local total_M=$(_total_M)
> +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
> +	local written_M=0
> +
> +	while [ $written_M -lt $to_write_M ]; do
> +		if [ $(_used_percent) -lt $fill_percent ]; then
> +			local fsz=$(_get_random_fsz)
> +
> +			_create_file $seq $fsz "$dd_extra"
> +			written_M=$((${written_M} + ${fsz}/${M}))
> +			seq=$((${seq} + 1))
> +		else
> +			_delete_random_file
> +		fi
> +	done
> +}
> +
> +seed=$RANDOM
> +RANDOM=$seed
> +echo "Running test with seed=$seed" >>$seqres.full
> +
> +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
> +_scratch_mount
> +
> +echo "Starting fillup using direct IO"
> +_direct_fillup
> +
> +echo "Starting mixed write/delete test using direct IO"
> +_mixed_write_delete "oflag=direct"
> +
> +echo "Starting mixed write/delete test using buffered IO"
> +_mixed_write_delete ""
> +
> +echo "Syncing"
> +sync ${SCRATCH_MNT}/*
> +
> +echo "Done, all good"
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/744.out b/tests/generic/744.out
> new file mode 100644
> index 000000000000..b40c2f43108e
> --- /dev/null
> +++ b/tests/generic/744.out
> @@ -0,0 +1,6 @@
> +QA output created by 744
> +Starting fillup using direct IO
> +Starting mixed write/delete test using direct IO
> +Starting mixed write/delete test using buffered IO
> +Syncing
> +Done, all good
Darrick J. Wong April 16, 2024, 6:54 p.m. UTC | #2
On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
> +Zorro (doh!)
> 
> On 2024-04-15 13:23, Hans Holmberg wrote:
> > This test stresses garbage collection for file systems by first filling
> > up a scratch mount to a specific usage point with files of random size,
> > then doing overwrites in parallel with deletes to fragment the backing
> > storage, forcing reclaim.
> > 
> > Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> > ---
> > 
> > Test results in my setup (kernel 6.8.0-rc4+)
> > 	f2fs on zoned nullblk: pass (77s)
> > 	f2fs on conventional nvme ssd: pass (13s)
> > 	btrfs on zoned nublk: fails (-ENOSPC)
> > 	btrfs on conventional nvme ssd: fails (-ENOSPC)
> > 	xfs on conventional nvme ssd: pass (8s)
> > 
> > Johannes(cc) is working on the btrfs ENOSPC issue.
> > 	
> >   tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
> >   tests/generic/744.out |   6 ++
> >   2 files changed, 130 insertions(+)
> >   create mode 100755 tests/generic/744
> >   create mode 100644 tests/generic/744.out
> > 
> > diff --git a/tests/generic/744 b/tests/generic/744
> > new file mode 100755
> > index 000000000000..2c7ab76bf8b1
> > --- /dev/null
> > +++ b/tests/generic/744
> > @@ -0,0 +1,124 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
> > +#
> > +# FS QA Test No. 744
> > +#
> > +# Inspired by btrfs/273 and generic/015
> > +#
> > +# This test stresses garbage collection in file systems
> > +# by first filling up a scratch mount to a specific usage point with
> > +# files of random size, then doing overwrites in parallel with
> > +# deletes to fragment the backing zones, forcing reclaim.
> > +
> > +. ./common/preamble
> > +_begin_fstest auto
> > +
> > +# real QA test starts here
> > +
> > +_require_scratch
> > +
> > +# This test requires specific data space usage, skip if we have compression
> > +# enabled.
> > +_require_no_compress
> > +
> > +M=$((1024 * 1024))
> > +min_fsz=$((1 * ${M}))
> > +max_fsz=$((256 * ${M}))
> > +bs=${M}
> > +fill_percent=95
> > +overwrite_percentage=20
> > +seq=0
> > +
> > +_create_file() {
> > +	local file_name=${SCRATCH_MNT}/data_$1
> > +	local file_sz=$2
> > +	local dd_extra=$3
> > +
> > +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
> > +		bs=${bs} count=$(( $file_sz / ${bs} )) \
> > +		status=none $dd_extra  2>&1
> > +
> > +	status=$?
> > +	if [ $status -ne 0 ]; then
> > +		echo "Failed writing $file_name" >>$seqres.full
> > +		exit
> > +	fi
> > +}

I wonder, is there a particular reason for doing all these file
operations with shell code instead of using fsstress to create and
delete files to fill the fs and stress all the zone-gc code?  This test
reminds me a lot of generic/476 but with more fork()ing.

--D

> > +
> > +_total_M() {
> > +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> > +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
> > +	echo $(( ${total} * ${bs} / ${M}))
> > +}
> > +
> > +_used_percent() {
> > +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
> > +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> > +	echo $((100 - (100 * ${available}) / ${total} ))
> > +}
> > +
> > +
> > +_delete_random_file() {
> > +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
> > +	rm $to_delete
> > +	sync ${SCRATCH_MNT}
> > +}
> > +
> > +_get_random_fsz() {
> > +	local r=$RANDOM
> > +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
> > +}
> > +
> > +_direct_fillup () {
> > +	while [ $(_used_percent) -lt $fill_percent ]; do
> > +		local fsz=$(_get_random_fsz)
> > +
> > +		_create_file $seq $fsz "oflag=direct conv=fsync"
> > +		seq=$((${seq} + 1))
> > +	done
> > +}
> > +
> > +_mixed_write_delete() {
> > +	local dd_extra=$1
> > +	local total_M=$(_total_M)
> > +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
> > +	local written_M=0
> > +
> > +	while [ $written_M -lt $to_write_M ]; do
> > +		if [ $(_used_percent) -lt $fill_percent ]; then
> > +			local fsz=$(_get_random_fsz)
> > +
> > +			_create_file $seq $fsz "$dd_extra"
> > +			written_M=$((${written_M} + ${fsz}/${M}))
> > +			seq=$((${seq} + 1))
> > +		else
> > +			_delete_random_file
> > +		fi
> > +	done
> > +}
> > +
> > +seed=$RANDOM
> > +RANDOM=$seed
> > +echo "Running test with seed=$seed" >>$seqres.full
> > +
> > +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
> > +_scratch_mount
> > +
> > +echo "Starting fillup using direct IO"
> > +_direct_fillup
> > +
> > +echo "Starting mixed write/delete test using direct IO"
> > +_mixed_write_delete "oflag=direct"
> > +
> > +echo "Starting mixed write/delete test using buffered IO"
> > +_mixed_write_delete ""
> > +
> > +echo "Syncing"
> > +sync ${SCRATCH_MNT}/*
> > +
> > +echo "Done, all good"
> > +
> > +# success, all done
> > +status=0
> > +exit
> > diff --git a/tests/generic/744.out b/tests/generic/744.out
> > new file mode 100644
> > index 000000000000..b40c2f43108e
> > --- /dev/null
> > +++ b/tests/generic/744.out
> > @@ -0,0 +1,6 @@
> > +QA output created by 744
> > +Starting fillup using direct IO
> > +Starting mixed write/delete test using direct IO
> > +Starting mixed write/delete test using buffered IO
> > +Syncing
> > +Done, all good
>
Zorro Lang April 17, 2024, 12:43 p.m. UTC | #3
On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
> > +Zorro (doh!)
> > 
> > On 2024-04-15 13:23, Hans Holmberg wrote:
> > > This test stresses garbage collection for file systems by first filling
> > > up a scratch mount to a specific usage point with files of random size,
> > > then doing overwrites in parallel with deletes to fragment the backing
> > > storage, forcing reclaim.
> > > 
> > > Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> > > ---
> > > 
> > > Test results in my setup (kernel 6.8.0-rc4+)
> > > 	f2fs on zoned nullblk: pass (77s)
> > > 	f2fs on conventional nvme ssd: pass (13s)
> > > 	btrfs on zoned nublk: fails (-ENOSPC)
> > > 	btrfs on conventional nvme ssd: fails (-ENOSPC)
> > > 	xfs on conventional nvme ssd: pass (8s)
> > > 
> > > Johannes(cc) is working on the btrfs ENOSPC issue.
> > > 	
> > >   tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
> > >   tests/generic/744.out |   6 ++
> > >   2 files changed, 130 insertions(+)
> > >   create mode 100755 tests/generic/744
> > >   create mode 100644 tests/generic/744.out
> > > 
> > > diff --git a/tests/generic/744 b/tests/generic/744
> > > new file mode 100755
> > > index 000000000000..2c7ab76bf8b1
> > > --- /dev/null
> > > +++ b/tests/generic/744
> > > @@ -0,0 +1,124 @@
> > > +#! /bin/bash
> > > +# SPDX-License-Identifier: GPL-2.0
> > > +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
> > > +#
> > > +# FS QA Test No. 744
> > > +#
> > > +# Inspired by btrfs/273 and generic/015
> > > +#
> > > +# This test stresses garbage collection in file systems
> > > +# by first filling up a scratch mount to a specific usage point with
> > > +# files of random size, then doing overwrites in parallel with
> > > +# deletes to fragment the backing zones, forcing reclaim.
> > > +
> > > +. ./common/preamble
> > > +_begin_fstest auto
> > > +
> > > +# real QA test starts here
> > > +
> > > +_require_scratch
> > > +
> > > +# This test requires specific data space usage, skip if we have compression
> > > +# enabled.
> > > +_require_no_compress
> > > +
> > > +M=$((1024 * 1024))
> > > +min_fsz=$((1 * ${M}))
> > > +max_fsz=$((256 * ${M}))
> > > +bs=${M}
> > > +fill_percent=95
> > > +overwrite_percentage=20
> > > +seq=0
> > > +
> > > +_create_file() {
> > > +	local file_name=${SCRATCH_MNT}/data_$1
> > > +	local file_sz=$2
> > > +	local dd_extra=$3
> > > +
> > > +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
> > > +		bs=${bs} count=$(( $file_sz / ${bs} )) \
> > > +		status=none $dd_extra  2>&1
> > > +
> > > +	status=$?
> > > +	if [ $status -ne 0 ]; then
> > > +		echo "Failed writing $file_name" >>$seqres.full
> > > +		exit
> > > +	fi
> > > +}
> 
> I wonder, is there a particular reason for doing all these file
> operations with shell code instead of using fsstress to create and
> delete files to fill the fs and stress all the zone-gc code?  This test
> reminds me a lot of generic/476 but with more fork()ing.

/me has the same confusion. Can this test cover more things than using
fsstress (to do reclaim test) ? Or does it uncover some known bugs which
other cases can't?

Thanks,
Zorro

> 
> --D
> 
> > > +
> > > +_total_M() {
> > > +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> > > +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
> > > +	echo $(( ${total} * ${bs} / ${M}))
> > > +}
> > > +
> > > +_used_percent() {
> > > +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
> > > +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> > > +	echo $((100 - (100 * ${available}) / ${total} ))
> > > +}
> > > +
> > > +
> > > +_delete_random_file() {
> > > +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
> > > +	rm $to_delete
> > > +	sync ${SCRATCH_MNT}
> > > +}
> > > +
> > > +_get_random_fsz() {
> > > +	local r=$RANDOM
> > > +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
> > > +}
> > > +
> > > +_direct_fillup () {
> > > +	while [ $(_used_percent) -lt $fill_percent ]; do
> > > +		local fsz=$(_get_random_fsz)
> > > +
> > > +		_create_file $seq $fsz "oflag=direct conv=fsync"
> > > +		seq=$((${seq} + 1))
> > > +	done
> > > +}
> > > +
> > > +_mixed_write_delete() {
> > > +	local dd_extra=$1
> > > +	local total_M=$(_total_M)
> > > +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
> > > +	local written_M=0
> > > +
> > > +	while [ $written_M -lt $to_write_M ]; do
> > > +		if [ $(_used_percent) -lt $fill_percent ]; then
> > > +			local fsz=$(_get_random_fsz)
> > > +
> > > +			_create_file $seq $fsz "$dd_extra"
> > > +			written_M=$((${written_M} + ${fsz}/${M}))
> > > +			seq=$((${seq} + 1))
> > > +		else
> > > +			_delete_random_file
> > > +		fi
> > > +	done
> > > +}
> > > +
> > > +seed=$RANDOM
> > > +RANDOM=$seed
> > > +echo "Running test with seed=$seed" >>$seqres.full
> > > +
> > > +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
> > > +_scratch_mount
> > > +
> > > +echo "Starting fillup using direct IO"
> > > +_direct_fillup
> > > +
> > > +echo "Starting mixed write/delete test using direct IO"
> > > +_mixed_write_delete "oflag=direct"
> > > +
> > > +echo "Starting mixed write/delete test using buffered IO"
> > > +_mixed_write_delete ""
> > > +
> > > +echo "Syncing"
> > > +sync ${SCRATCH_MNT}/*
> > > +
> > > +echo "Done, all good"
> > > +
> > > +# success, all done
> > > +status=0
> > > +exit
> > > diff --git a/tests/generic/744.out b/tests/generic/744.out
> > > new file mode 100644
> > > index 000000000000..b40c2f43108e
> > > --- /dev/null
> > > +++ b/tests/generic/744.out
> > > @@ -0,0 +1,6 @@
> > > +QA output created by 744
> > > +Starting fillup using direct IO
> > > +Starting mixed write/delete test using direct IO
> > > +Starting mixed write/delete test using buffered IO
> > > +Syncing
> > > +Done, all good
> > 
>
Hans Holmberg April 17, 2024, 1:21 p.m. UTC | #4
On 2024-04-17 14:43, Zorro Lang wrote:
> On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
>> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
>>> +Zorro (doh!)
>>>
>>> On 2024-04-15 13:23, Hans Holmberg wrote:
>>>> This test stresses garbage collection for file systems by first filling
>>>> up a scratch mount to a specific usage point with files of random size,
>>>> then doing overwrites in parallel with deletes to fragment the backing
>>>> storage, forcing reclaim.
>>>>
>>>> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
>>>> ---
>>>>
>>>> Test results in my setup (kernel 6.8.0-rc4+)
>>>> 	f2fs on zoned nullblk: pass (77s)
>>>> 	f2fs on conventional nvme ssd: pass (13s)
>>>> 	btrfs on zoned nublk: fails (-ENOSPC)
>>>> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
>>>> 	xfs on conventional nvme ssd: pass (8s)
>>>>
>>>> Johannes(cc) is working on the btrfs ENOSPC issue.
>>>> 	
>>>>    tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
>>>>    tests/generic/744.out |   6 ++
>>>>    2 files changed, 130 insertions(+)
>>>>    create mode 100755 tests/generic/744
>>>>    create mode 100644 tests/generic/744.out
>>>>
>>>> diff --git a/tests/generic/744 b/tests/generic/744
>>>> new file mode 100755
>>>> index 000000000000..2c7ab76bf8b1
>>>> --- /dev/null
>>>> +++ b/tests/generic/744
>>>> @@ -0,0 +1,124 @@
>>>> +#! /bin/bash
>>>> +# SPDX-License-Identifier: GPL-2.0
>>>> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
>>>> +#
>>>> +# FS QA Test No. 744
>>>> +#
>>>> +# Inspired by btrfs/273 and generic/015
>>>> +#
>>>> +# This test stresses garbage collection in file systems
>>>> +# by first filling up a scratch mount to a specific usage point with
>>>> +# files of random size, then doing overwrites in parallel with
>>>> +# deletes to fragment the backing zones, forcing reclaim.
>>>> +
>>>> +. ./common/preamble
>>>> +_begin_fstest auto
>>>> +
>>>> +# real QA test starts here
>>>> +
>>>> +_require_scratch
>>>> +
>>>> +# This test requires specific data space usage, skip if we have compression
>>>> +# enabled.
>>>> +_require_no_compress
>>>> +
>>>> +M=$((1024 * 1024))
>>>> +min_fsz=$((1 * ${M}))
>>>> +max_fsz=$((256 * ${M}))
>>>> +bs=${M}
>>>> +fill_percent=95
>>>> +overwrite_percentage=20
>>>> +seq=0
>>>> +
>>>> +_create_file() {
>>>> +	local file_name=${SCRATCH_MNT}/data_$1
>>>> +	local file_sz=$2
>>>> +	local dd_extra=$3
>>>> +
>>>> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
>>>> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
>>>> +		status=none $dd_extra  2>&1
>>>> +
>>>> +	status=$?
>>>> +	if [ $status -ne 0 ]; then
>>>> +		echo "Failed writing $file_name" >>$seqres.full
>>>> +		exit
>>>> +	fi
>>>> +}
>>
>> I wonder, is there a particular reason for doing all these file
>> operations with shell code instead of using fsstress to create and
>> delete files to fill the fs and stress all the zone-gc code?  This test
>> reminds me a lot of generic/476 but with more fork()ing.
> 
> /me has the same confusion. Can this test cover more things than using
> fsstress (to do reclaim test) ? Or does it uncover some known bugs which
> other cases can't?

ah, adding some more background is probably useful:

I've been using this test to stress the crap out the zoned xfs garbage
collection / write throttling implementation for zoned rt subvolumes
support in xfs and it has found a number of issues during implementation
that i did not reproduce by other means.

I think it also has wider applicability as it triggers bugs in btrfs. 
f2fs passes without issues, but probably benefits from a quick smoke gc 
test as well. Discussed this with Bart and Daeho (now in cc) before 
submitting.

Using fsstress would be cool, but as far as I can tell it cannot
be told to operate at a specific file system usage point, which
is a key thing for this test.

Thanks,
Hans

> 
> Thanks,
> Zorro
> 
>>
>> --D
>>
>>>> +
>>>> +_total_M() {
>>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
>>>> +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
>>>> +	echo $(( ${total} * ${bs} / ${M}))
>>>> +}
>>>> +
>>>> +_used_percent() {
>>>> +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
>>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
>>>> +	echo $((100 - (100 * ${available}) / ${total} ))
>>>> +}
>>>> +
>>>> +
>>>> +_delete_random_file() {
>>>> +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
>>>> +	rm $to_delete
>>>> +	sync ${SCRATCH_MNT}
>>>> +}
>>>> +
>>>> +_get_random_fsz() {
>>>> +	local r=$RANDOM
>>>> +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
>>>> +}
>>>> +
>>>> +_direct_fillup () {
>>>> +	while [ $(_used_percent) -lt $fill_percent ]; do
>>>> +		local fsz=$(_get_random_fsz)
>>>> +
>>>> +		_create_file $seq $fsz "oflag=direct conv=fsync"
>>>> +		seq=$((${seq} + 1))
>>>> +	done
>>>> +}
>>>> +
>>>> +_mixed_write_delete() {
>>>> +	local dd_extra=$1
>>>> +	local total_M=$(_total_M)
>>>> +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
>>>> +	local written_M=0
>>>> +
>>>> +	while [ $written_M -lt $to_write_M ]; do
>>>> +		if [ $(_used_percent) -lt $fill_percent ]; then
>>>> +			local fsz=$(_get_random_fsz)
>>>> +
>>>> +			_create_file $seq $fsz "$dd_extra"
>>>> +			written_M=$((${written_M} + ${fsz}/${M}))
>>>> +			seq=$((${seq} + 1))
>>>> +		else
>>>> +			_delete_random_file
>>>> +		fi
>>>> +	done
>>>> +}
>>>> +
>>>> +seed=$RANDOM
>>>> +RANDOM=$seed
>>>> +echo "Running test with seed=$seed" >>$seqres.full
>>>> +
>>>> +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
>>>> +_scratch_mount
>>>> +
>>>> +echo "Starting fillup using direct IO"
>>>> +_direct_fillup
>>>> +
>>>> +echo "Starting mixed write/delete test using direct IO"
>>>> +_mixed_write_delete "oflag=direct"
>>>> +
>>>> +echo "Starting mixed write/delete test using buffered IO"
>>>> +_mixed_write_delete ""
>>>> +
>>>> +echo "Syncing"
>>>> +sync ${SCRATCH_MNT}/*
>>>> +
>>>> +echo "Done, all good"
>>>> +
>>>> +# success, all done
>>>> +status=0
>>>> +exit
>>>> diff --git a/tests/generic/744.out b/tests/generic/744.out
>>>> new file mode 100644
>>>> index 000000000000..b40c2f43108e
>>>> --- /dev/null
>>>> +++ b/tests/generic/744.out
>>>> @@ -0,0 +1,6 @@
>>>> +QA output created by 744
>>>> +Starting fillup using direct IO
>>>> +Starting mixed write/delete test using direct IO
>>>> +Starting mixed write/delete test using buffered IO
>>>> +Syncing
>>>> +Done, all good
>>>
>>
> 
>
Zorro Lang April 17, 2024, 2:06 p.m. UTC | #5
On Wed, Apr 17, 2024 at 01:21:39PM +0000, Hans Holmberg wrote:
> On 2024-04-17 14:43, Zorro Lang wrote:
> > On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
> >> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
> >>> +Zorro (doh!)
> >>>
> >>> On 2024-04-15 13:23, Hans Holmberg wrote:
> >>>> This test stresses garbage collection for file systems by first filling
> >>>> up a scratch mount to a specific usage point with files of random size,
> >>>> then doing overwrites in parallel with deletes to fragment the backing
> >>>> storage, forcing reclaim.
> >>>>
> >>>> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> >>>> ---
> >>>>
> >>>> Test results in my setup (kernel 6.8.0-rc4+)
> >>>> 	f2fs on zoned nullblk: pass (77s)
> >>>> 	f2fs on conventional nvme ssd: pass (13s)
> >>>> 	btrfs on zoned nublk: fails (-ENOSPC)
> >>>> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
> >>>> 	xfs on conventional nvme ssd: pass (8s)
> >>>>
> >>>> Johannes(cc) is working on the btrfs ENOSPC issue.
> >>>> 	
> >>>>    tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
> >>>>    tests/generic/744.out |   6 ++
> >>>>    2 files changed, 130 insertions(+)
> >>>>    create mode 100755 tests/generic/744
> >>>>    create mode 100644 tests/generic/744.out
> >>>>
> >>>> diff --git a/tests/generic/744 b/tests/generic/744
> >>>> new file mode 100755
> >>>> index 000000000000..2c7ab76bf8b1
> >>>> --- /dev/null
> >>>> +++ b/tests/generic/744
> >>>> @@ -0,0 +1,124 @@
> >>>> +#! /bin/bash
> >>>> +# SPDX-License-Identifier: GPL-2.0
> >>>> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
> >>>> +#
> >>>> +# FS QA Test No. 744
> >>>> +#
> >>>> +# Inspired by btrfs/273 and generic/015
> >>>> +#
> >>>> +# This test stresses garbage collection in file systems
> >>>> +# by first filling up a scratch mount to a specific usage point with
> >>>> +# files of random size, then doing overwrites in parallel with
> >>>> +# deletes to fragment the backing zones, forcing reclaim.
> >>>> +
> >>>> +. ./common/preamble
> >>>> +_begin_fstest auto
> >>>> +
> >>>> +# real QA test starts here
> >>>> +
> >>>> +_require_scratch
> >>>> +
> >>>> +# This test requires specific data space usage, skip if we have compression
> >>>> +# enabled.
> >>>> +_require_no_compress
> >>>> +
> >>>> +M=$((1024 * 1024))
> >>>> +min_fsz=$((1 * ${M}))
> >>>> +max_fsz=$((256 * ${M}))
> >>>> +bs=${M}
> >>>> +fill_percent=95
> >>>> +overwrite_percentage=20
> >>>> +seq=0
> >>>> +
> >>>> +_create_file() {
> >>>> +	local file_name=${SCRATCH_MNT}/data_$1
> >>>> +	local file_sz=$2
> >>>> +	local dd_extra=$3
> >>>> +
> >>>> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
> >>>> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
> >>>> +		status=none $dd_extra  2>&1
> >>>> +
> >>>> +	status=$?
> >>>> +	if [ $status -ne 0 ]; then
> >>>> +		echo "Failed writing $file_name" >>$seqres.full
> >>>> +		exit
> >>>> +	fi
> >>>> +}
> >>
> >> I wonder, is there a particular reason for doing all these file
> >> operations with shell code instead of using fsstress to create and
> >> delete files to fill the fs and stress all the zone-gc code?  This test
> >> reminds me a lot of generic/476 but with more fork()ing.
> > 
> > /me has the same confusion. Can this test cover more things than using
> > fsstress (to do reclaim test) ? Or does it uncover some known bugs which
> > other cases can't?
> 
> ah, adding some more background is probably useful:
> 
> I've been using this test to stress the crap out the zoned xfs garbage
> collection / write throttling implementation for zoned rt subvolumes
> support in xfs and it has found a number of issues during implementation
> that i did not reproduce by other means.
> 
> I think it also has wider applicability as it triggers bugs in btrfs. 
> f2fs passes without issues, but probably benefits from a quick smoke gc 
> test as well. Discussed this with Bart and Daeho (now in cc) before 
> submitting.
> 
> Using fsstress would be cool, but as far as I can tell it cannot
> be told to operate at a specific file system usage point, which
> is a key thing for this test.

As a random test case, if this case can be transformed to use fsstress to cover
same issues, that would be nice.

But if as a regression test case, it has its particular test coverage, and the
issue it covered can't be reproduced by fsstress way, then let's work on this
bash script one.

Any thoughts?

Thanks,
Zorro

> 
> Thanks,
> Hans
> 
> > 
> > Thanks,
> > Zorro
> > 
> >>
> >> --D
> >>
> >>>> +
> >>>> +_total_M() {
> >>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> >>>> +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
> >>>> +	echo $(( ${total} * ${bs} / ${M}))
> >>>> +}
> >>>> +
> >>>> +_used_percent() {
> >>>> +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
> >>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> >>>> +	echo $((100 - (100 * ${available}) / ${total} ))
> >>>> +}
> >>>> +
> >>>> +
> >>>> +_delete_random_file() {
> >>>> +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
> >>>> +	rm $to_delete
> >>>> +	sync ${SCRATCH_MNT}
> >>>> +}
> >>>> +
> >>>> +_get_random_fsz() {
> >>>> +	local r=$RANDOM
> >>>> +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
> >>>> +}
> >>>> +
> >>>> +_direct_fillup () {
> >>>> +	while [ $(_used_percent) -lt $fill_percent ]; do
> >>>> +		local fsz=$(_get_random_fsz)
> >>>> +
> >>>> +		_create_file $seq $fsz "oflag=direct conv=fsync"
> >>>> +		seq=$((${seq} + 1))
> >>>> +	done
> >>>> +}
> >>>> +
> >>>> +_mixed_write_delete() {
> >>>> +	local dd_extra=$1
> >>>> +	local total_M=$(_total_M)
> >>>> +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
> >>>> +	local written_M=0
> >>>> +
> >>>> +	while [ $written_M -lt $to_write_M ]; do
> >>>> +		if [ $(_used_percent) -lt $fill_percent ]; then
> >>>> +			local fsz=$(_get_random_fsz)
> >>>> +
> >>>> +			_create_file $seq $fsz "$dd_extra"
> >>>> +			written_M=$((${written_M} + ${fsz}/${M}))
> >>>> +			seq=$((${seq} + 1))
> >>>> +		else
> >>>> +			_delete_random_file
> >>>> +		fi
> >>>> +	done
> >>>> +}
> >>>> +
> >>>> +seed=$RANDOM
> >>>> +RANDOM=$seed
> >>>> +echo "Running test with seed=$seed" >>$seqres.full
> >>>> +
> >>>> +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
> >>>> +_scratch_mount
> >>>> +
> >>>> +echo "Starting fillup using direct IO"
> >>>> +_direct_fillup
> >>>> +
> >>>> +echo "Starting mixed write/delete test using direct IO"
> >>>> +_mixed_write_delete "oflag=direct"
> >>>> +
> >>>> +echo "Starting mixed write/delete test using buffered IO"
> >>>> +_mixed_write_delete ""
> >>>> +
> >>>> +echo "Syncing"
> >>>> +sync ${SCRATCH_MNT}/*
> >>>> +
> >>>> +echo "Done, all good"
> >>>> +
> >>>> +# success, all done
> >>>> +status=0
> >>>> +exit
> >>>> diff --git a/tests/generic/744.out b/tests/generic/744.out
> >>>> new file mode 100644
> >>>> index 000000000000..b40c2f43108e
> >>>> --- /dev/null
> >>>> +++ b/tests/generic/744.out
> >>>> @@ -0,0 +1,6 @@
> >>>> +QA output created by 744
> >>>> +Starting fillup using direct IO
> >>>> +Starting mixed write/delete test using direct IO
> >>>> +Starting mixed write/delete test using buffered IO
> >>>> +Syncing
> >>>> +Done, all good
> >>>
> >>
> > 
> > 
>
Hans Holmberg April 17, 2024, 2:45 p.m. UTC | #6
On 2024-04-17 16:07, Zorro Lang wrote:
> On Wed, Apr 17, 2024 at 01:21:39PM +0000, Hans Holmberg wrote:
>> On 2024-04-17 14:43, Zorro Lang wrote:
>>> On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
>>>> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
>>>>> +Zorro (doh!)
>>>>>
>>>>> On 2024-04-15 13:23, Hans Holmberg wrote:
>>>>>> This test stresses garbage collection for file systems by first filling
>>>>>> up a scratch mount to a specific usage point with files of random size,
>>>>>> then doing overwrites in parallel with deletes to fragment the backing
>>>>>> storage, forcing reclaim.
>>>>>>
>>>>>> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
>>>>>> ---
>>>>>>
>>>>>> Test results in my setup (kernel 6.8.0-rc4+)
>>>>>> 	f2fs on zoned nullblk: pass (77s)
>>>>>> 	f2fs on conventional nvme ssd: pass (13s)
>>>>>> 	btrfs on zoned nublk: fails (-ENOSPC)
>>>>>> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
>>>>>> 	xfs on conventional nvme ssd: pass (8s)
>>>>>>
>>>>>> Johannes(cc) is working on the btrfs ENOSPC issue.
>>>>>> 	
>>>>>>     tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
>>>>>>     tests/generic/744.out |   6 ++
>>>>>>     2 files changed, 130 insertions(+)
>>>>>>     create mode 100755 tests/generic/744
>>>>>>     create mode 100644 tests/generic/744.out
>>>>>>
>>>>>> diff --git a/tests/generic/744 b/tests/generic/744
>>>>>> new file mode 100755
>>>>>> index 000000000000..2c7ab76bf8b1
>>>>>> --- /dev/null
>>>>>> +++ b/tests/generic/744
>>>>>> @@ -0,0 +1,124 @@
>>>>>> +#! /bin/bash
>>>>>> +# SPDX-License-Identifier: GPL-2.0
>>>>>> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
>>>>>> +#
>>>>>> +# FS QA Test No. 744
>>>>>> +#
>>>>>> +# Inspired by btrfs/273 and generic/015
>>>>>> +#
>>>>>> +# This test stresses garbage collection in file systems
>>>>>> +# by first filling up a scratch mount to a specific usage point with
>>>>>> +# files of random size, then doing overwrites in parallel with
>>>>>> +# deletes to fragment the backing zones, forcing reclaim.
>>>>>> +
>>>>>> +. ./common/preamble
>>>>>> +_begin_fstest auto
>>>>>> +
>>>>>> +# real QA test starts here
>>>>>> +
>>>>>> +_require_scratch
>>>>>> +
>>>>>> +# This test requires specific data space usage, skip if we have compression
>>>>>> +# enabled.
>>>>>> +_require_no_compress
>>>>>> +
>>>>>> +M=$((1024 * 1024))
>>>>>> +min_fsz=$((1 * ${M}))
>>>>>> +max_fsz=$((256 * ${M}))
>>>>>> +bs=${M}
>>>>>> +fill_percent=95
>>>>>> +overwrite_percentage=20
>>>>>> +seq=0
>>>>>> +
>>>>>> +_create_file() {
>>>>>> +	local file_name=${SCRATCH_MNT}/data_$1
>>>>>> +	local file_sz=$2
>>>>>> +	local dd_extra=$3
>>>>>> +
>>>>>> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
>>>>>> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
>>>>>> +		status=none $dd_extra  2>&1
>>>>>> +
>>>>>> +	status=$?
>>>>>> +	if [ $status -ne 0 ]; then
>>>>>> +		echo "Failed writing $file_name" >>$seqres.full
>>>>>> +		exit
>>>>>> +	fi
>>>>>> +}
>>>>
>>>> I wonder, is there a particular reason for doing all these file
>>>> operations with shell code instead of using fsstress to create and
>>>> delete files to fill the fs and stress all the zone-gc code?  This test
>>>> reminds me a lot of generic/476 but with more fork()ing.
>>>
>>> /me has the same confusion. Can this test cover more things than using
>>> fsstress (to do reclaim test) ? Or does it uncover some known bugs which
>>> other cases can't?
>>
>> ah, adding some more background is probably useful:
>>
>> I've been using this test to stress the crap out the zoned xfs garbage
>> collection / write throttling implementation for zoned rt subvolumes
>> support in xfs and it has found a number of issues during implementation
>> that i did not reproduce by other means.
>>
>> I think it also has wider applicability as it triggers bugs in btrfs.
>> f2fs passes without issues, but probably benefits from a quick smoke gc
>> test as well. Discussed this with Bart and Daeho (now in cc) before
>> submitting.
>>
>> Using fsstress would be cool, but as far as I can tell it cannot
>> be told to operate at a specific file system usage point, which
>> is a key thing for this test.
> 
> As a random test case, if this case can be transformed to use fsstress to cover
> same issues, that would be nice.
> 
> But if as a regression test case, it has its particular test coverage, and the
> issue it covered can't be reproduced by fsstress way, then let's work on this
> bash script one.
> 
> Any thoughts?

Yeah, I think bash is preferable for this particular test case.
Bash also makes it easy to hack for people's private uses.

I use longer versions of this test (increasing overwrite_percentage)
for weekly testing.

If we need fsstress for reproducing any future gc bug we can add
whats missing to it then.

Does that make sense?

Thanks,
Hans

> 
> Thanks,
> Zorro
> 
>>
>> Thanks,
>> Hans
>>
>>>
>>> Thanks,
>>> Zorro
>>>
>>>>
>>>> --D
>>>>
>>>>>> +
>>>>>> +_total_M() {
>>>>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
>>>>>> +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
>>>>>> +	echo $(( ${total} * ${bs} / ${M}))
>>>>>> +}
>>>>>> +
>>>>>> +_used_percent() {
>>>>>> +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
>>>>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
>>>>>> +	echo $((100 - (100 * ${available}) / ${total} ))
>>>>>> +}
>>>>>> +
>>>>>> +
>>>>>> +_delete_random_file() {
>>>>>> +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
>>>>>> +	rm $to_delete
>>>>>> +	sync ${SCRATCH_MNT}
>>>>>> +}
>>>>>> +
>>>>>> +_get_random_fsz() {
>>>>>> +	local r=$RANDOM
>>>>>> +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
>>>>>> +}
>>>>>> +
>>>>>> +_direct_fillup () {
>>>>>> +	while [ $(_used_percent) -lt $fill_percent ]; do
>>>>>> +		local fsz=$(_get_random_fsz)
>>>>>> +
>>>>>> +		_create_file $seq $fsz "oflag=direct conv=fsync"
>>>>>> +		seq=$((${seq} + 1))
>>>>>> +	done
>>>>>> +}
>>>>>> +
>>>>>> +_mixed_write_delete() {
>>>>>> +	local dd_extra=$1
>>>>>> +	local total_M=$(_total_M)
>>>>>> +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
>>>>>> +	local written_M=0
>>>>>> +
>>>>>> +	while [ $written_M -lt $to_write_M ]; do
>>>>>> +		if [ $(_used_percent) -lt $fill_percent ]; then
>>>>>> +			local fsz=$(_get_random_fsz)
>>>>>> +
>>>>>> +			_create_file $seq $fsz "$dd_extra"
>>>>>> +			written_M=$((${written_M} + ${fsz}/${M}))
>>>>>> +			seq=$((${seq} + 1))
>>>>>> +		else
>>>>>> +			_delete_random_file
>>>>>> +		fi
>>>>>> +	done
>>>>>> +}
>>>>>> +
>>>>>> +seed=$RANDOM
>>>>>> +RANDOM=$seed
>>>>>> +echo "Running test with seed=$seed" >>$seqres.full
>>>>>> +
>>>>>> +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
>>>>>> +_scratch_mount
>>>>>> +
>>>>>> +echo "Starting fillup using direct IO"
>>>>>> +_direct_fillup
>>>>>> +
>>>>>> +echo "Starting mixed write/delete test using direct IO"
>>>>>> +_mixed_write_delete "oflag=direct"
>>>>>> +
>>>>>> +echo "Starting mixed write/delete test using buffered IO"
>>>>>> +_mixed_write_delete ""
>>>>>> +
>>>>>> +echo "Syncing"
>>>>>> +sync ${SCRATCH_MNT}/*
>>>>>> +
>>>>>> +echo "Done, all good"
>>>>>> +
>>>>>> +# success, all done
>>>>>> +status=0
>>>>>> +exit
>>>>>> diff --git a/tests/generic/744.out b/tests/generic/744.out
>>>>>> new file mode 100644
>>>>>> index 000000000000..b40c2f43108e
>>>>>> --- /dev/null
>>>>>> +++ b/tests/generic/744.out
>>>>>> @@ -0,0 +1,6 @@
>>>>>> +QA output created by 744
>>>>>> +Starting fillup using direct IO
>>>>>> +Starting mixed write/delete test using direct IO
>>>>>> +Starting mixed write/delete test using buffered IO
>>>>>> +Syncing
>>>>>> +Done, all good
>>>>>
>>>>
>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/tests/generic/744 b/tests/generic/744
new file mode 100755
index 000000000000..2c7ab76bf8b1
--- /dev/null
+++ b/tests/generic/744
@@ -0,0 +1,124 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
+#
+# FS QA Test No. 744
+#
+# Inspired by btrfs/273 and generic/015
+#
+# This test stresses garbage collection in file systems
+# by first filling up a scratch mount to a specific usage point with
+# files of random size, then doing overwrites in parallel with
+# deletes to fragment the backing zones, forcing reclaim.
+
+. ./common/preamble
+_begin_fstest auto
+
+# real QA test starts here
+
+_require_scratch
+
+# This test requires specific data space usage, skip if we have compression
+# enabled.
+_require_no_compress
+
+M=$((1024 * 1024))
+min_fsz=$((1 * ${M}))
+max_fsz=$((256 * ${M}))
+bs=${M}
+fill_percent=95
+overwrite_percentage=20
+seq=0
+
+_create_file() {
+	local file_name=${SCRATCH_MNT}/data_$1
+	local file_sz=$2
+	local dd_extra=$3
+
+	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
+		bs=${bs} count=$(( $file_sz / ${bs} )) \
+		status=none $dd_extra  2>&1
+
+	status=$?
+	if [ $status -ne 0 ]; then
+		echo "Failed writing $file_name" >>$seqres.full
+		exit
+	fi
+}
+
+_total_M() {
+	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
+	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
+	echo $(( ${total} * ${bs} / ${M}))
+}
+
+_used_percent() {
+	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
+	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
+	echo $((100 - (100 * ${available}) / ${total} ))
+}
+
+
+_delete_random_file() {
+	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
+	rm $to_delete
+	sync ${SCRATCH_MNT}
+}
+
+_get_random_fsz() {
+	local r=$RANDOM
+	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
+}
+
+_direct_fillup () {
+	while [ $(_used_percent) -lt $fill_percent ]; do
+		local fsz=$(_get_random_fsz)
+
+		_create_file $seq $fsz "oflag=direct conv=fsync"
+		seq=$((${seq} + 1))
+	done
+}
+
+_mixed_write_delete() {
+	local dd_extra=$1
+	local total_M=$(_total_M)
+	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
+	local written_M=0
+
+	while [ $written_M -lt $to_write_M ]; do
+		if [ $(_used_percent) -lt $fill_percent ]; then
+			local fsz=$(_get_random_fsz)
+
+			_create_file $seq $fsz "$dd_extra"
+			written_M=$((${written_M} + ${fsz}/${M}))
+			seq=$((${seq} + 1))
+		else
+			_delete_random_file
+		fi
+	done
+}
+
+seed=$RANDOM
+RANDOM=$seed
+echo "Running test with seed=$seed" >>$seqres.full
+
+_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
+_scratch_mount
+
+echo "Starting fillup using direct IO"
+_direct_fillup
+
+echo "Starting mixed write/delete test using direct IO"
+_mixed_write_delete "oflag=direct"
+
+echo "Starting mixed write/delete test using buffered IO"
+_mixed_write_delete ""
+
+echo "Syncing"
+sync ${SCRATCH_MNT}/*
+
+echo "Done, all good"
+
+# success, all done
+status=0
+exit
diff --git a/tests/generic/744.out b/tests/generic/744.out
new file mode 100644
index 000000000000..b40c2f43108e
--- /dev/null
+++ b/tests/generic/744.out
@@ -0,0 +1,6 @@ 
+QA output created by 744
+Starting fillup using direct IO
+Starting mixed write/delete test using direct IO
+Starting mixed write/delete test using buffered IO
+Syncing
+Done, all good