diff mbox series

[1/2] common: provide generic block error injection helper

Message ID 20220704090346.108134-1-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/2] common: provide generic block error injection helper | expand

Commit Message

Christoph Hellwig July 4, 2022, 9:03 a.m. UTC
Various tests have more or less copy and pasted code to enable and
disable block layer "fail make request" error injection.  Move that
to a set of common helpers and use those in the drivers.

btrfs/150 differened from the other two in a few ways, like selecting
a not quite as big number to fail all requests in the small critical
section and clearing a bunch of never set attributes in the failure
injection configuration, but none of those matter for the test
execution.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 common/inject     | 33 +++++++++++++++++++++++++++++++++
 tests/btrfs/088   | 24 +++++-------------------
 tests/btrfs/150   | 27 +++++----------------------
 tests/generic/019 | 44 +++++++++-----------------------------------
 4 files changed, 52 insertions(+), 76 deletions(-)

Comments

Anand Jain July 4, 2022, 3 p.m. UTC | #1
On 04/07/2022 17:03, Christoph Hellwig wrote:
> Various tests have more or less copy and pasted code to enable and
> disable block layer "fail make request" error injection.  Move that
> to a set of common helpers and use those in the drivers.
> 

> btrfs/150 differened from the other two in a few ways, like selecting
> a not quite as big number to fail all requests in the small critical
> section and clearing a bunch of never set attributes in the failure
> injection configuration, but none of those matter for the test
> execution.

Hm. more below.


> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   common/inject     | 33 +++++++++++++++++++++++++++++++++
>   tests/btrfs/088   | 24 +++++-------------------
>   tests/btrfs/150   | 27 +++++----------------------
>   tests/generic/019 | 44 +++++++++-----------------------------------
>   4 files changed, 52 insertions(+), 76 deletions(-)
> 
> diff --git a/common/inject b/common/inject
> index 6b590804..137ff5fd 100644
> --- a/common/inject
> +++ b/common/inject
> @@ -111,3 +111,36 @@ _scratch_inject_error()
>   		_fail "Cannot inject error ${type} value ${value}."
>   	fi
>   }
> +
> +# enable block error injection globally
> +_enable_fail_make_request()
> +{
> +	echo 100 > $DEBUGFS_MNT/fail_make_request/probability

> +	echo 9999999 > $DEBUGFS_MNT/fail_make_request/times

Instead, can we do
    printf %#x -1  > $DEBUGFS_MNT/fail_make_request/times
(as in the documentation).


<snip>

> @@ -25,24 +26,6 @@ _require_fail_make_request
>   _require_scratch_dev_pool 2
>   _scratch_dev_pool_get 2
>   
> -SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`
> -enable_io_failure()
> -{
> -	echo 100 > $DEBUGFS_MNT/fail_make_request/probability
> -	echo 1000 > $DEBUGFS_MNT/fail_make_request/times
> -	echo 0 > $DEBUGFS_MNT/fail_make_request/verbose


> -	echo 1 > $DEBUGFS_MNT/fail_make_request/task-filter
Only extra line in btrfs/150 is the above line (or did I miss any)?
Which is deleted in this patch.

Per 'task-filter' documentation
--------------
- /sys/kernel/debug/fail*/task-filter:

         Format: { 'Y' | 'N' }

         A value of 'N' disables filtering by process (default).
         Any positive value limits failures to only processes indicated by
         /proc/<pid>/make-it-fail==1.
--------------

<snip>

> -	enable_io_failure
> -
> +	_enable_fail_make_request
> +	_start_fail_dev $SCRATCH_DEV
>   	result=$(bash -c "
>   	if [ \$((\$\$ % 2)) == 1 ]; then

>   		echo 1 > /proc/\$\$/make-it-fail

  So this won't work now.

Thanks, Anand
Christoph Hellwig July 5, 2022, 7:43 a.m. UTC | #2
On Mon, Jul 04, 2022 at 11:00:57PM +0800, Anand Jain wrote:
>> -	echo 1 > $DEBUGFS_MNT/fail_make_request/task-filter
> Only extra line in btrfs/150 is the above line (or did I miss any)?
> Which is deleted in this patch.

Yes.

>> -	enable_io_failure
>> -
>> +	_enable_fail_make_request
>> +	_start_fail_dev $SCRATCH_DEV
>>   	result=$(bash -c "
>>   	if [ \$((\$\$ % 2)) == 1 ]; then
>
>>   		echo 1 > /proc/\$\$/make-it-fail
>
>  So this won't work now.

Yes.
diff mbox series

Patch

diff --git a/common/inject b/common/inject
index 6b590804..137ff5fd 100644
--- a/common/inject
+++ b/common/inject
@@ -111,3 +111,36 @@  _scratch_inject_error()
 		_fail "Cannot inject error ${type} value ${value}."
 	fi
 }
+
+# enable block error injection globally
+_enable_fail_make_request()
+{
+	echo 100 > $DEBUGFS_MNT/fail_make_request/probability
+	echo 9999999 > $DEBUGFS_MNT/fail_make_request/times
+	echo 0 >  /sys/kernel/debug/fail_make_request/verbose
+}
+
+# disable block error injection globally
+_disable_fail_make_request()
+{
+	echo 0 > $DEBUGFS_MNT/fail_make_request/probability
+	echo 0 > $DEBUGFS_MNT/fail_make_request/times
+}
+
+# make a device always fail
+_start_fail_dev()
+{
+	local sysfs_bdev=`_sysfs_dev $1`
+
+	echo " echo 1 > $sysfs_bdev/make-it-fail" >> $seqres.full
+	echo 1 > $sysfs_bdev/make-it-fail
+}
+
+# stop a device from failing
+_stop_fail_dev()
+{
+	local sysfs_bdev=`_sysfs_dev $1`
+
+	echo " echo 0 > $sysfs_bdev/make-it-fail" >> $seqres.full
+	echo 0 > $sysfs_bdev/make-it-fail
+}
diff --git a/tests/btrfs/088 b/tests/btrfs/088
index d9c5528b..bd02401a 100755
--- a/tests/btrfs/088
+++ b/tests/btrfs/088
@@ -18,29 +18,13 @@  _begin_fstest auto quick metadata
 
 # Import common functions.
 . ./common/filter
+. ./common/inject
 
 # real QA test starts here
 _supported_fs btrfs
 _require_scratch
 _require_fail_make_request
 
-SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`
-
-enable_io_failure()
-{
-	echo 100 > $DEBUGFS_MNT/fail_make_request/probability
-	echo 1000 > $DEBUGFS_MNT/fail_make_request/times
-	echo 0 > $DEBUGFS_MNT/fail_make_request/verbose
-	echo 1 > $SYSFS_BDEV/make-it-fail
-}
-
-disable_io_failure()
-{
-	echo 0 > $SYSFS_BDEV/make-it-fail
-	echo 0 > $DEBUGFS_MNT/fail_make_request/probability
-	echo 0 > $DEBUGFS_MNT/fail_make_request/times
-}
-
 # We will abort a btrfs transaction later, which always produces a warning in
 # dmesg. We do not want the test to fail because of this.
 _disable_dmesg_check
@@ -72,12 +56,14 @@  $XFS_IO_PROG -c "pwrite -S 0xbb 512K 1M" $SCRATCH_MNT/foo | _filter_xfs_io
 # perform a discard operation against the pinned exents. This made the fs
 # unmountable because the btree roots that the superblock points at were written
 # in place (by the discard operations).
-enable_io_failure
+_enable_fail_make_request
+_start_fail_dev $SCRATCH_DEV
 
 # This sync will trigger a commit of the current transaction, which will be
 # aborted because IO will fail for metadata extents (btree nodes/leafs).
 sync
-disable_io_failure
+_stop_fail_dev $SCRATCH_DEV
+_disable_fail_make_request
 
 touch $SCRATCH_MNT/abc >>$seqres.full 2>&1 && \
 	echo "Transaction was not aborted, filesystem is not in readonly mode"
diff --git a/tests/btrfs/150 b/tests/btrfs/150
index c5e9c709..cc80cb41 100755
--- a/tests/btrfs/150
+++ b/tests/btrfs/150
@@ -15,6 +15,7 @@  _begin_fstest auto quick dangerous read_repair
 
 # Import common functions.
 . ./common/filter
+. ./common/inject
 
 # real QA test starts here
 
@@ -25,24 +26,6 @@  _require_fail_make_request
 _require_scratch_dev_pool 2
 _scratch_dev_pool_get 2
 
-SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`
-enable_io_failure()
-{
-	echo 100 > $DEBUGFS_MNT/fail_make_request/probability
-	echo 1000 > $DEBUGFS_MNT/fail_make_request/times
-	echo 0 > $DEBUGFS_MNT/fail_make_request/verbose
-	echo 1 > $DEBUGFS_MNT/fail_make_request/task-filter
-	echo 1 > $SYSFS_BDEV/make-it-fail
-}
-
-disable_io_failure()
-{
-	echo 0 > $DEBUGFS_MNT/fail_make_request/probability
-	echo 0 > $DEBUGFS_MNT/fail_make_request/times
-	echo 0 > $DEBUGFS_MNT/fail_make_request/task-filter
-	echo 0 > $SYSFS_BDEV/make-it-fail
-}
-
 _check_minimal_fs_size $(( 1024 * 1024 * 1024 ))
 _scratch_pool_mkfs "-d raid1 -b 1G" >> $seqres.full 2>&1
 
@@ -59,15 +42,15 @@  while [[ -z $result ]]; do
 	# invalidate the page cache
 	$XFS_IO_PROG -f -c "fadvise -d 0 8K" $SCRATCH_MNT/foobar
 
-	enable_io_failure
-
+	_enable_fail_make_request
+	_start_fail_dev $SCRATCH_DEV
 	result=$(bash -c "
 	if [ \$((\$\$ % 2)) == 1 ]; then
 		echo 1 > /proc/\$\$/make-it-fail
 		exec $XFS_IO_PROG -c \"pread 0 8K\" \$SCRATCH_MNT/foobar
 	fi")
-
-	disable_io_failure
+	_stop_fail_dev $SCRATCH_DEV
+	_disable_fail_make_request
 done
 
 _scratch_dev_pool_put
diff --git a/tests/generic/019 b/tests/generic/019
index 45c91624..3cfcb7c5 100755
--- a/tests/generic/019
+++ b/tests/generic/019
@@ -14,47 +14,18 @@  fio_config=$tmp.fio
 
 # Import common functions.
 . ./common/filter
+. ./common/inject
 _supported_fs generic
 _require_scratch
 _require_block_device $SCRATCH_DEV
 _require_fail_make_request
 
-SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`
-
-allow_fail_make_request()
-{
-    echo "Allow global fail_make_request feature"
-    echo 100 > $DEBUGFS_MNT/fail_make_request/probability
-    echo 9999999 > $DEBUGFS_MNT/fail_make_request/times
-    echo 0 >  /sys/kernel/debug/fail_make_request/verbose
-}
-
-disallow_fail_make_request()
-{
-    echo "Disallow global fail_make_request feature"
-    echo 0 > $DEBUGFS_MNT/fail_make_request/probability
-    echo 0 > $DEBUGFS_MNT/fail_make_request/times
-}
-
-start_fail_scratch_dev()
-{
-    echo "Force SCRATCH_DEV device failure"
-    echo " echo 1 > $SYSFS_BDEV/make-it-fail" >> $seqres.full
-    echo 1 > $SYSFS_BDEV/make-it-fail
-}
-
-stop_fail_scratch_dev()
-{
-    echo "Make SCRATCH_DEV device operable again"
-    echo " echo 0 > $SYSFS_BDEV/make-it-fail" >> $seqres.full
-    echo 0 > $SYSFS_BDEV/make-it-fail
-}
-
 # Override the default cleanup function.
 _cleanup()
 {
 	kill $fs_pid $fio_pid &> /dev/null
-	disallow_fail_make_request
+	echo "Disallow global fail_make_request feature"
+	_disable_fail_make_request
 	cd /
 	rm -r -f $tmp.*
 }
@@ -129,7 +100,8 @@  _workout()
 
 	# Let's it work for awhile, and force device failure
 	sleep $RUN_TIME
-	start_fail_scratch_dev
+	echo "Force SCRATCH_DEV device failure"
+	_start_fail_dev $SCRATCH_DEV
 	# After device turns in to failed state filesystem may yet not know about
 	# that so buffered write(2) may succeed, but any integrity operations
 	# such as (sync, fsync, fdatasync, direct-io) should fail.
@@ -147,7 +119,8 @@  _workout()
 	run_check _scratch_unmount
 	# Once filesystem was umounted no one is able to write to block device
 	# It is now safe to bring device back to normal state
-	stop_fail_scratch_dev
+	echo "Make SCRATCH_DEV device operable again"
+	_stop_fail_dev $SCRATCH_DEV
 
 	# In order to check that filesystem is able to recover journal on mount(2)
 	# perform mount/umount, after that all errors should be fixed
@@ -159,7 +132,8 @@  _workout()
 
 _scratch_mkfs >> $seqres.full 2>&1 || _fail "mkfs failed"
 _scratch_mount
-allow_fail_make_request
+echo "Allow global fail_make_request feature"
+_enable_fail_make_request
 _workout
 status=$?
 exit