@@ -138,6 +138,10 @@ _dmerror_load_error_table()
suspend_opt="$*"
fi
+ # If the full environment is set up, configure ourselves for shutdown
+ type _prepare_for_eio_shutdown &>/dev/null && \
+ _prepare_for_eio_shutdown $DMERROR_DEV
+
# Suspend the scratch device before the log and realtime devices so
# that the kernel can freeze and flush the filesystem if the caller
# wanted a freeze.
@@ -44,6 +44,7 @@ _start_fail_scratch_dev()
{
echo "Force SCRATCH_DEV device failure"
+ _prepare_for_eio_shutdown $SCRATCH_DEV
_bdev_fail_make_request $SCRATCH_DEV 1
[ "$USE_EXTERNAL" = yes -a ! -z "$SCRATCH_LOGDEV" ] && \
_bdev_fail_make_request $SCRATCH_LOGDEV 1
@@ -4372,6 +4372,20 @@ _check_dmesg()
fi
}
+# Make whatever configuration changes we need ahead of testing fs shutdowns due
+# to unexpected IO errors while updating metadata. The sole parameter should
+# be the fs device, e.g. $SCRATCH_DEV.
+_prepare_for_eio_shutdown()
+{
+ local dev="$1"
+
+ case "$FSTYP" in
+ "xfs")
+ _xfs_prepare_for_eio_shutdown "$dev"
+ ;;
+ esac
+}
+
# capture the kmemleak report
_capture_kmemleak()
{
@@ -4634,7 +4648,7 @@ run_fsx()
#
# Usage example:
# _require_fs_sysfs error/fail_at_unmount
-_require_fs_sysfs()
+_has_fs_sysfs()
{
local attr=$1
local dname
@@ -4650,9 +4664,18 @@ _require_fs_sysfs()
_fail "Usage: _require_fs_sysfs <sysfs_attr_path>"
fi
- if [ ! -e /sys/fs/${FSTYP}/${dname}/${attr} ];then
- _notrun "This test requires /sys/fs/${FSTYP}/${dname}/${attr}"
- fi
+ test -e /sys/fs/${FSTYP}/${dname}/${attr}
+}
+
+# Require the existence of a sysfs entry at /sys/fs/$FSTYP/DEV/$ATTR
+_require_fs_sysfs()
+{
+ _has_fs_sysfs "$@" && return
+
+ local attr=$1
+ local dname=$(_short_dev $TEST_DEV)
+
+ _notrun "This test requires /sys/fs/${FSTYP}/${dname}/${attr}"
}
_require_statx()
@@ -800,6 +800,35 @@ _scratch_xfs_unmount_dirty()
_scratch_unmount
}
+# Prepare a mounted filesystem for an IO error shutdown test by disabling retry
+# for metadata writes. This prevents a (rare) log livelock when:
+#
+# - The log has given out all available grant space, preventing any new
+# writers from tripping over IO errors (and shutting down the fs/log),
+# - All log buffers were written to disk, and
+# - The log tail is pinned because the AIL keeps hitting EIO trying to write
+# committed changes back into the filesystem.
+#
+# Real users might want the default behavior of the AIL retrying writes forever
+# but for testing purposes we don't want to wait.
+#
+# The sole parameter should be the filesystem data device, e.g. $SCRATCH_DEV.
+_xfs_prepare_for_eio_shutdown()
+{
+ local dev="$1"
+ local ctlfile="error/fail_at_unmount"
+
+ # Don't retry any writes during the (presumably) post-shutdown unmount
+ _has_fs_sysfs "$ctlfile" && _set_fs_sysfs_attr $dev "$ctlfile" 1
+
+ # Disable retry of metadata writes that fail with EIO
+ for ctl in max_retries retry_timeout_seconds; do
+ ctlfile="error/metadata/EIO/$ctl"
+
+ _has_fs_sysfs "$ctlfile" && _set_fs_sysfs_attr $dev "$ctlfile" 0
+ done
+}
+
# Skip if we are running an older binary without the stricter input checks.
# Make multiple checks to be sure that there is no regression on the one
# selected feature check, which would skew the result.