diff mbox series

[18/24] common: check xfs health after doing an online scrub

Message ID 167243878140.730387.8820397468231187961.stgit@magnolia (mailing list archive)
State New, archived
Headers show
Series fstests: improve xfs fuzzing | expand

Commit Message

Darrick J. Wong Dec. 30, 2022, 10:19 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

After we've run xfs_scrub -n to perform a check of a mounted
filesystem's metadata, we should check the health reporting system to
make sure that the results got recorded.  Also wire this up to the xfs
fuzz testing helpers.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 common/fuzzy |   27 +++++++++++++++++++++++++++
 common/xfs   |   43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+)
diff mbox series

Patch

diff --git a/common/fuzzy b/common/fuzzy
index cf085f8b28..d841d435eb 100644
--- a/common/fuzzy
+++ b/common/fuzzy
@@ -216,6 +216,15 @@  __scratch_xfs_fuzz_field_online() {
 	test $res -eq 0 && \
 		(>&2 echo "${fuzz_action}: online scrub didn't fail.")
 
+	# Does the health status report reflect the corruption?
+	if [ $res -ne 0 ]; then
+		__fuzz_notify "++ Detect fuzzed field ill-health report"
+		_check_xfs_health $SCRATCH_MNT 2>&1
+		res=$?
+		test $res -ne 1 && \
+			(>&2 echo "${fuzz_action}: online health check failed ($res).")
+	fi
+
 	# Try fixing the filesystem online
 	__fuzz_notify "++ Try to repair filesystem (online)"
 	_scratch_scrub 2>&1
@@ -308,6 +317,15 @@  __scratch_xfs_fuzz_field_norepair() {
 	test $res -eq 0 && \
 		(>&2 echo "${fuzz_action}: online scrub didn't fail.")
 
+	# Does the health status report reflect the corruption?
+	if [ $res -ne 0 ]; then
+		__fuzz_notify "++ Detect fuzzed field ill-health report"
+		_check_xfs_health $SCRATCH_MNT 2>&1
+		res=$?
+		test $res -ne 1 && \
+			(>&2 echo "${fuzz_action}: online health check failed ($res).")
+	fi
+
 	__scratch_xfs_fuzz_unmount
 
 	return 0
@@ -338,6 +356,15 @@  __scratch_xfs_fuzz_field_both() {
 		test $res -eq 0 && \
 			(>&2 echo "${fuzz_action}: online scrub didn't fail.")
 
+		# Does the health status report reflect the corruption?
+		if [ $res -ne 0 ]; then
+			__fuzz_notify "++ Detect fuzzed field ill-health report"
+			_check_xfs_health $SCRATCH_MNT 2>&1
+			res=$?
+			test $res -ne 1 && \
+				(>&2 echo "${fuzz_action}: online health check failed ($res).")
+		fi
+
 		# Try fixing the filesystem online
 		__fuzz_notify "++ Try to repair filesystem (online)"
 		_scratch_scrub 2>&1
diff --git a/common/xfs b/common/xfs
index 804047557b..371618dc7b 100644
--- a/common/xfs
+++ b/common/xfs
@@ -599,6 +599,37 @@  _require_xfs_db_command()
 		_notrun "xfs_db $command support is missing"
 }
 
+# Check the health of a mounted XFS filesystem.  Callers probably want to
+# ensure that xfs_scrub has been run first.  Returns 1 if unhealthy metadata
+# are found or 0 otherwise.
+_check_xfs_health() {
+	local mntpt="$1"
+	local ret=0
+	local t="$tmp.health_helper"
+
+	test -x "$XFS_SPACEMAN_PROG" || return 0
+
+	$XFS_SPACEMAN_PROG -c 'health -c -q' $mntpt > $t.out 2> $t.err
+	test $? -ne 0 && ret=1
+
+	# Don't return error if userspace or kernel don't support health
+	# reporting.
+	grep -q 'command.*health.*not found' $t.err && return 0
+	grep -q 'Inappropriate ioctl for device' $t.err && return 0
+
+	# Filter out the "please run scrub" message if nothing's been checked.
+	sed -e '/Health status has not been/d' -e '/Please run xfs_scrub/d' -i \
+			$t.err
+
+	grep -q unhealthy $t.out && ret=1
+	test $(wc -l < $t.err) -gt 0 && ret=1
+	cat $t.out
+	cat $t.err 1>&2
+	rm -f $t.out $t.err
+
+	return $ret
+}
+
 # Does the filesystem mounted from a particular device support scrub?
 _supports_xfs_scrub()
 {
@@ -750,6 +781,18 @@  _check_xfs_filesystem()
 			ok=0
 		fi
 		rm -f $tmp.scrub
+
+		# Does the health reporting notice anything?
+		_check_xfs_health $mntpt > $tmp.health 2>&1
+		res=$?
+		if [ $((res ^ ok)) -eq 0 ]; then
+			_log_err "_check_xfs_filesystem: filesystem on $device failed health check"
+			echo "*** xfs_spaceman -c 'health -c -q' output ***" >> $seqres.full
+			cat $tmp.health >> $seqres.full
+			echo "*** end xfs_spaceman output" >> $seqres.full
+			ok=0
+		fi
+		rm -f $tmp.health
 	fi
 
 	if [ "$type" = "xfs" ]; then