diff mbox series

[3/3] xfs: race fsmap with readonly remounts to detect crash or livelock

Message ID 167243837811.695156.1712702786946561753.stgit@magnolia (mailing list archive)
State New, archived
Headers show
Series [1/3] fuzzy: enhance scrub stress testing to use fsx | expand

Commit Message

Darrick J. Wong Dec. 30, 2022, 10:12 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Add a new test that races the GETFSMAP ioctl with ro/rw remounting to
make sure we don't livelock on the empty transaction that fsmap uses to
avoid deadlocking on rmap btree cycles.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 common/fuzzy      |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 ltp/fsstress.c    |   18 +++++++++-
 tests/xfs/732     |   38 +++++++++++++++++++++
 tests/xfs/732.out |    2 +
 4 files changed, 153 insertions(+), 3 deletions(-)
 create mode 100755 tests/xfs/732
 create mode 100644 tests/xfs/732.out
diff mbox series

Patch

diff --git a/common/fuzzy b/common/fuzzy
index 58e299d34b..ee97aa4298 100644
--- a/common/fuzzy
+++ b/common/fuzzy
@@ -429,6 +429,7 @@  __stress_scrub_clean_scratch() {
 __stress_scrub_fsx_loop() {
 	local end="$1"
 	local runningfile="$2"
+	local remount_period="$3"
 	local focus=(-q -X)	# quiet, validate file contents
 
 	# As of November 2022, 2 million fsx ops should be enough to keep
@@ -440,6 +441,43 @@  __stress_scrub_fsx_loop() {
 	local args="$FSX_AVOID ${focus[@]} ${SCRATCH_MNT}/fsx.$seq"
 	echo "Running $here/ltp/fsx $args" >> $seqres.full
 
+	if [ -n "$remount_period" ]; then
+		local mode="rw"
+		local rw_arg=""
+		while __stress_scrub_running "$end" "$runningfile"; do
+			# Need to recheck running conditions if we cleared
+			# anything.
+			test "$mode" = "rw" && __stress_scrub_clean_scratch && continue
+
+			timeout -s TERM "$remount_period" $here/ltp/fsx \
+					$args $rw_arg >> $seqres.full
+			res=$?
+			echo "$mode fsx exits with $res at $(date)" >> $seqres.full
+			if [ "$res" -ne 0 ] && [ "$res" -ne 124 ]; then
+				# Stop if fsstress returns error.  Mask off
+				# the magic code 124 because that is how the
+				# timeout(1) program communicates that we ran
+				# out of time.
+				break;
+			fi
+			if [ "$mode" = "rw" ]; then
+				mode="ro"
+				rw_arg="-t 0 -w 0 -FHzCIJBE0"
+			else
+				mode="rw"
+				rw_arg=""
+			fi
+
+			# Try remounting until we get the result we wanted
+			while ! _scratch_remount "$mode" &>/dev/null && \
+			      __stress_scrub_running "$end" "$runningfile"; do
+				sleep 0.2
+			done
+		done
+		rm -f "$runningfile"
+		return 0
+	fi
+
 	while __stress_scrub_running "$end" "$runningfile"; do
 		# Need to recheck running conditions if we cleared anything
 		__stress_scrub_clean_scratch && continue
@@ -453,12 +491,50 @@  __stress_scrub_fsx_loop() {
 __stress_scrub_fsstress_loop() {
 	local end="$1"
 	local runningfile="$2"
+	local remount_period="$3"
 
 	# As of March 2022, 2 million fsstress ops should be enough to keep
 	# any filesystem busy for a couple of hours.
 	local args=$(_scale_fsstress_args -p 4 -d $SCRATCH_MNT -n 2000000 $FSSTRESS_AVOID)
 	echo "Running $FSSTRESS_PROG $args" >> $seqres.full
 
+	if [ -n "$remount_period" ]; then
+		local mode="rw"
+		local rw_arg=""
+		while __stress_scrub_running "$end" "$runningfile"; do
+			# Need to recheck running conditions if we cleared
+			# anything.
+			test "$mode" = "rw" && __stress_scrub_clean_scratch && continue
+
+			timeout -s TERM "$remount_period" $FSSTRESS_PROG \
+					$args $rw_arg >> $seqres.full
+			res=$?
+			echo "$mode fsstress exits with $res at $(date)" >> $seqres.full
+			if [ "$res" -ne 0 ] && [ "$res" -ne 124 ]; then
+				# Stop if fsstress returns error.  Mask off
+				# the magic code 124 because that is how the
+				# timeout(1) program communicates that we ran
+				# out of time.
+				break;
+			fi
+			if [ "$mode" = "rw" ]; then
+				mode="ro"
+				rw_arg="-R"
+			else
+				mode="rw"
+				rw_arg=""
+			fi
+
+			# Try remounting until we get the result we wanted
+			while ! _scratch_remount "$mode" &>/dev/null && \
+			      __stress_scrub_running "$end" "$runningfile"; do
+				sleep 0.2
+			done
+		done
+		rm -f "$runningfile"
+		return 0
+	fi
+
 	while __stress_scrub_running "$end" "$runningfile"; do
 		# Need to recheck running conditions if we cleared anything
 		__stress_scrub_clean_scratch && continue
@@ -526,6 +602,13 @@  _scratch_xfs_stress_scrub_cleanup() {
 	echo "Waiting for children to exit at $(date)" >> $seqres.full
 	wait
 
+	# Ensure the scratch fs is also writable before we exit.
+	if [ -n "$__SCRUB_STRESS_REMOUNT_LOOP" ]; then
+		echo "Remounting rw at $(date)" >> $seqres.full
+		_scratch_remount rw >> $seqres.full 2>&1
+		__SCRUB_STRESS_REMOUNT_LOOP=""
+	fi
+
 	echo "Cleanup finished at $(date)" >> $seqres.full
 }
 
@@ -561,6 +644,9 @@  __stress_scrub_check_commands() {
 #	in a separate loop.  If zero -i options are specified, do not run.
 #	Callers must check each of these commands (via _require_xfs_io_command)
 #	before calling here.
+# -r	Run fsstress for this amount of time, then remount the fs ro or rw.
+#	The default is to run fsstress continuously with no remount, unless
+#	XFS_SCRUB_STRESS_REMOUNT_PERIOD is set.
 # -s	Pass this command to xfs_io to test scrub.  If zero -s options are
 #	specified, xfs_io will not be run.
 # -t	Run online scrub against this file; $SCRATCH_MNT is the default.
@@ -577,16 +663,19 @@  _scratch_xfs_stress_scrub() {
 	local scrub_delay="${XFS_SCRUB_STRESS_DELAY:--1}"
 	local exerciser="fsstress"
 	local io_args=()
+	local remount_period="${XFS_SCRUB_STRESS_REMOUNT_PERIOD}"
 
 	__SCRUB_STRESS_FREEZE_PID=""
+	__SCRUB_STRESS_REMOUNT_LOOP=""
 	rm -f "$runningfile"
 	touch "$runningfile"
 
 	OPTIND=1
-	while getopts "fi:s:t:w:X:" c; do
+	while getopts "fi:r:s:t:w:X:" c; do
 		case "$c" in
 			f) freeze=yes;;
 			i) io_args+=("$OPTARG");;
+			r) remount_period="$OPTARG";;
 			s) one_scrub_args+=("$OPTARG");;
 			t) scrub_tgt="$OPTARG";;
 			w) scrub_delay="$OPTARG";;
@@ -611,7 +700,12 @@  _scratch_xfs_stress_scrub() {
 	echo "Loop started at $(date --date="@${start}")," \
 		   "ending at $(date --date="@${end}")" >> $seqres.full
 
-	"__stress_scrub_${exerciser}_loop" "$end" "$runningfile" &
+	if [ -n "$remount_period" ]; then
+		__SCRUB_STRESS_REMOUNT_LOOP="1"
+	fi
+
+	"__stress_scrub_${exerciser}_loop" "$end" "$runningfile" \
+			"$remount_period" &
 
 	if [ -n "$freeze" ]; then
 		__stress_scrub_freeze_loop "$end" "$runningfile" &
diff --git a/ltp/fsstress.c b/ltp/fsstress.c
index b395bc4da2..10608fb554 100644
--- a/ltp/fsstress.c
+++ b/ltp/fsstress.c
@@ -426,6 +426,7 @@  int	symlink_path(const char *, pathname_t *);
 int	truncate64_path(pathname_t *, off64_t);
 int	unlink_path(pathname_t *);
 void	usage(void);
+void	read_freq(void);
 void	write_freq(void);
 void	zero_freq(void);
 void	non_btrfs_freq(const char *);
@@ -472,7 +473,7 @@  int main(int argc, char **argv)
 	xfs_error_injection_t	        err_inj;
 	struct sigaction action;
 	int		loops = 1;
-	const char	*allopts = "cd:e:f:i:l:m:M:n:o:p:rs:S:vVwx:X:zH";
+	const char	*allopts = "cd:e:f:i:l:m:M:n:o:p:rRs:S:vVwx:X:zH";
 
 	errrange = errtag = 0;
 	umask(0);
@@ -538,6 +539,9 @@  int main(int argc, char **argv)
 		case 'r':
 			namerand = 1;
 			break;
+		case 'R':
+			read_freq();
+			break;
 		case 's':
 			seed = strtoul(optarg, NULL, 0);
 			break;
@@ -1917,6 +1921,7 @@  usage(void)
 	printf("   -o logfile       specifies logfile name\n");
 	printf("   -p nproc         specifies the no. of processes (default 1)\n");
 	printf("   -r               specifies random name padding\n");
+	printf("   -R               zeros frequencies of write operations\n");
 	printf("   -s seed          specifies the seed for the random generator (default random)\n");
 	printf("   -v               specifies verbose mode\n");
 	printf("   -w               zeros frequencies of non-write operations\n");
@@ -1928,6 +1933,17 @@  usage(void)
 	printf("   -H               prints usage and exits\n");
 }
 
+void
+read_freq(void)
+{
+	opdesc_t	*p;
+
+	for (p = ops; p < ops_end; p++) {
+		if (p->iswrite)
+			p->freq = 0;
+	}
+}
+
 void
 write_freq(void)
 {
diff --git a/tests/xfs/732 b/tests/xfs/732
new file mode 100755
index 0000000000..ed6fb3c977
--- /dev/null
+++ b/tests/xfs/732
@@ -0,0 +1,38 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2022 Oracle.  All Rights Reserved.
+#
+# FS QA Test No. 732
+#
+# Race GETFSMAP and ro remount for a while to see if we crash or livelock.
+#
+. ./common/preamble
+_begin_fstest auto quick fsmap remount
+
+# Override the default cleanup function.
+_cleanup()
+{
+	cd /
+	_scratch_xfs_stress_scrub_cleanup
+	rm -rf $tmp.*
+}
+
+# Import common functions.
+. ./common/filter
+. ./common/fuzzy
+. ./common/xfs
+
+# real QA test starts here
+_supported_fs xfs
+_require_xfs_scratch_rmapbt
+_require_xfs_io_command "fsmap"
+_require_xfs_stress_scrub
+
+_scratch_mkfs > "$seqres.full" 2>&1
+_scratch_mount
+_scratch_xfs_stress_scrub -r 5 -i 'fsmap -v'
+
+# success, all done
+echo "Silence is golden"
+status=0
+exit
diff --git a/tests/xfs/732.out b/tests/xfs/732.out
new file mode 100644
index 0000000000..451f82ce2d
--- /dev/null
+++ b/tests/xfs/732.out
@@ -0,0 +1,2 @@ 
+QA output created by 732
+Silence is golden