diff mbox series

[2/2] check: capture dmesg of mount failures if test fails

Message ID 173568782758.2712126.12517347004514444060.stgit@frogsfrogsfrogs (mailing list archive)
State New
Headers show
Series [1/2] treewide: convert all $MOUNT_PROG to _mount | expand

Commit Message

Darrick J. Wong Dec. 31, 2024, 11:56 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Capture the kernel output after a mount failure occurs.  If the test
itself fails, then keep the logging output for further diagnosis.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
 check                  |   22 +++++++++++++++++++++-
 common/rc              |   26 +++++++++++++++++++++++++-
 common/report          |    8 ++++++++
 tests/selftest/008     |   20 ++++++++++++++++++++
 tests/selftest/008.out |    1 +
 5 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100755 tests/selftest/008
 create mode 100644 tests/selftest/008.out

Comments

Nirjhar Roy Jan. 6, 2025, 11:18 a.m. UTC | #1
On Tue, 2024-12-31 at 15:56 -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Capture the kernel output after a mount failure occurs.  If the test
> itself fails, then keep the logging output for further diagnosis.
> 
> Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
> ---
>  check                  |   22 +++++++++++++++++++++-
>  common/rc              |   26 +++++++++++++++++++++++++-
>  common/report          |    8 ++++++++
>  tests/selftest/008     |   20 ++++++++++++++++++++
>  tests/selftest/008.out |    1 +
>  5 files changed, 75 insertions(+), 2 deletions(-)
>  create mode 100755 tests/selftest/008
>  create mode 100644 tests/selftest/008.out
> 
> 
> diff --git a/check b/check
> index 9222cd7e4f8197..a46ea1a54d78bb 100755
> --- a/check
> +++ b/check
> @@ -614,7 +614,7 @@ _stash_fail_loop_files() {
>  	local seq_prefix="${REPORT_DIR}/${1}"
>  	local cp_suffix="$2"
>  
> -	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core"
> ".hints"; do
> +	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core" ".hints"
> ".mountfail"; do
>  		rm -f "${seq_prefix}${i}${cp_suffix}"
>  		if [ -f "${seq_prefix}${i}" ]; then
>  			cp "${seq_prefix}${i}"
> "${seq_prefix}${i}${cp_suffix}"
> @@ -994,6 +994,7 @@ function run_section()
>  				      echo -n "	$seqnum -- "
>  			cat $seqres.notrun
>  			tc_status="notrun"
> +			rm -f "$seqres.mountfail?"
>  			_stash_test_status "$seqnum" "$tc_status"
>  
>  			# Unmount the scratch fs so that we can wipe
> the scratch
> @@ -1053,6 +1054,7 @@ function run_section()
>  		if [ ! -f $seq.out ]; then
>  			_dump_err "no qualified output"
>  			tc_status="fail"
> +			rm -f "$seqres.mountfail?"
>  			_stash_test_status "$seqnum" "$tc_status"
>  			continue;
>  		fi
> @@ -1089,6 +1091,24 @@ function run_section()
>  				rm -f $seqres.hints
>  			fi
>  		fi
> +
> +		if [ -f "$seqres.mountfail?" ]; then
> +			if [ "$tc_status" = "fail" ]; then
> +				# Let the user know if there were mount
> +				# failures on a test that failed
> because that
> +				# could be interesting.
> +				mv "$seqres.mountfail?"
> "$seqres.mountfail"
> +				_dump_err "check: possible mount
> failures (see $seqres.mountfail)"
> +				test -f $seqres.mountfail && \
> +					maybe_compress_logfile
> $seqres.mountfail $MAX_MOUNTFAIL_SIZE
> +			else
> +				# Don't retain mount failure logs for
> tests
> +				# that pass or were skipped because
> some tests
> +				# intentionally drive mount failures.
> +				rm -f "$seqres.mountfail?"
> +			fi
> +		fi
> +
>  		_stash_test_status "$seqnum" "$tc_status"
>  	done
>  
> diff --git a/common/rc b/common/rc
> index d7dfb55bbbd7e1..0ede68eb912440 100644
> --- a/common/rc
> +++ b/common/rc
> @@ -204,9 +204,33 @@ _get_hugepagesize()
>  	awk '/Hugepagesize/ {print $2 * 1024}' /proc/meminfo
>  }
>  
> +# Does dmesg have a --since flag?
> +_dmesg_detect_since()
> +{
> +	if [ -z "$DMESG_HAS_SINCE" ]; then
> +		test "$DMESG_HAS_SINCE" = "yes"
> +		return
> +	elif dmesg --help | grep -q -- --since; then
> +		DMESG_HAS_SINCE=yes
> +	else
> +		DMESG_HAS_SINCE=no
> +	fi
> +}
> +
>  _mount()
>  {
> -    $MOUNT_PROG $*
> +	$MOUNT_PROG $*
> +	ret=$?
> +	if [ "$ret" -ne 0 ]; then
> +		echo "\"$MOUNT_PROG $*\" failed at $(date)" >>
> "$seqres.mountfail?"
> +		if _dmesg_detect_since; then
> +			dmesg --since '30s ago' >> "$seqres.mountfail?"
> +		else
> +			dmesg | tail -n 100 >> "$seqres.mountfail?"
Is it possible to grep for a mount failure message in dmesg and then
capture the last n lines? Do you think that will be more accurate?

Also, do you think it is useful to make this 100 configurable instead
of hardcoding? 
> +		fi
> +	fi
> +
> +	return $ret
>  }
>  
>  # Call _mount to do mount operation but also save mountpoint to
> diff --git a/common/report b/common/report
> index 0e91e481f9725a..b57697f76dafb2 100644
> --- a/common/report
> +++ b/common/report
> @@ -199,6 +199,7 @@ _xunit_make_testcase_report()
>  		local out_src="${SRC_DIR}/${test_name}.out"
>  		local full_file="${REPORT_DIR}/${test_name}.full"
>  		local dmesg_file="${REPORT_DIR}/${test_name}.dmesg"
> +		local
> mountfail_file="${REPORT_DIR}/${test_name}.mountfail"
>  		local outbad_file="${REPORT_DIR}/${test_name}.out.bad"
>  		if [ -z "$_err_msg" ]; then
>  			_err_msg="Test $test_name failed, reason
> unknown"
> @@ -225,6 +226,13 @@ _xunit_make_testcase_report()
>  			printf ']]>\n'	>>$report
>  			echo -e "\t\t</system-err>" >> $report
>  		fi
> +		if [ -z "$quiet" -a -f "$mountfail_file" ]; then
> +			echo -e "\t\t<mount-failure>" >> $report
> +			printf	'<![CDATA[\n' >>$report
> +			cat "$mountfail_file" | tr -dc
> '[:print:][:space:]' | encode_cdata >>$report
> +			printf ']]>\n'	>>$report
> +			echo -e "\t\t</mount-failure>" >> $report
> +		fi
>  		;;
>  	*)
>  		echo -e "\t\t<failure message=\"Unknown
> test_status=$test_status\" type=\"TestFail\"/>" >> $report
> diff --git a/tests/selftest/008 b/tests/selftest/008
> new file mode 100755
> index 00000000000000..db80ffe6f77339
> --- /dev/null
> +++ b/tests/selftest/008
> @@ -0,0 +1,20 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2024-2025 Oracle.  All Rights Reserved.
> +#
> +# FS QA Test 008
> +#
> +# Test mount failure capture.
> +#
> +. ./common/preamble
> +_begin_fstest selftest
> +
> +_require_command "$WIPEFS_PROG" wipefs
> +_require_scratch
> +
> +$WIPEFS_PROG -a $SCRATCH_DEV
> +_scratch_mount &>> $seqres.full
Minor: Do you think adding some filtered messages from the captured
dmesg logs in the output will be helpful?  
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/selftest/008.out b/tests/selftest/008.out
> new file mode 100644
> index 00000000000000..aaff95f3f48372
> --- /dev/null
> +++ b/tests/selftest/008.out
> @@ -0,0 +1 @@
> +QA output created by 008
>
Darrick J. Wong Jan. 6, 2025, 11:52 p.m. UTC | #2
On Mon, Jan 06, 2025 at 04:48:34PM +0530, Nirjhar Roy wrote:
> On Tue, 2024-12-31 at 15:56 -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Capture the kernel output after a mount failure occurs.  If the test
> > itself fails, then keep the logging output for further diagnosis.
> > 
> > Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
> > ---
> >  check                  |   22 +++++++++++++++++++++-
> >  common/rc              |   26 +++++++++++++++++++++++++-
> >  common/report          |    8 ++++++++
> >  tests/selftest/008     |   20 ++++++++++++++++++++
> >  tests/selftest/008.out |    1 +
> >  5 files changed, 75 insertions(+), 2 deletions(-)
> >  create mode 100755 tests/selftest/008
> >  create mode 100644 tests/selftest/008.out
> > 
> > 
> > diff --git a/check b/check
> > index 9222cd7e4f8197..a46ea1a54d78bb 100755
> > --- a/check
> > +++ b/check
> > @@ -614,7 +614,7 @@ _stash_fail_loop_files() {
> >  	local seq_prefix="${REPORT_DIR}/${1}"
> >  	local cp_suffix="$2"
> >  
> > -	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core"
> > ".hints"; do
> > +	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core" ".hints"
> > ".mountfail"; do
> >  		rm -f "${seq_prefix}${i}${cp_suffix}"
> >  		if [ -f "${seq_prefix}${i}" ]; then
> >  			cp "${seq_prefix}${i}"
> > "${seq_prefix}${i}${cp_suffix}"
> > @@ -994,6 +994,7 @@ function run_section()
> >  				      echo -n "	$seqnum -- "
> >  			cat $seqres.notrun
> >  			tc_status="notrun"
> > +			rm -f "$seqres.mountfail?"
> >  			_stash_test_status "$seqnum" "$tc_status"
> >  
> >  			# Unmount the scratch fs so that we can wipe
> > the scratch
> > @@ -1053,6 +1054,7 @@ function run_section()
> >  		if [ ! -f $seq.out ]; then
> >  			_dump_err "no qualified output"
> >  			tc_status="fail"
> > +			rm -f "$seqres.mountfail?"
> >  			_stash_test_status "$seqnum" "$tc_status"
> >  			continue;
> >  		fi
> > @@ -1089,6 +1091,24 @@ function run_section()
> >  				rm -f $seqres.hints
> >  			fi
> >  		fi
> > +
> > +		if [ -f "$seqres.mountfail?" ]; then
> > +			if [ "$tc_status" = "fail" ]; then
> > +				# Let the user know if there were mount
> > +				# failures on a test that failed
> > because that
> > +				# could be interesting.
> > +				mv "$seqres.mountfail?"
> > "$seqres.mountfail"
> > +				_dump_err "check: possible mount
> > failures (see $seqres.mountfail)"
> > +				test -f $seqres.mountfail && \
> > +					maybe_compress_logfile
> > $seqres.mountfail $MAX_MOUNTFAIL_SIZE
> > +			else
> > +				# Don't retain mount failure logs for
> > tests
> > +				# that pass or were skipped because
> > some tests
> > +				# intentionally drive mount failures.
> > +				rm -f "$seqres.mountfail?"
> > +			fi
> > +		fi
> > +
> >  		_stash_test_status "$seqnum" "$tc_status"
> >  	done
> >  
> > diff --git a/common/rc b/common/rc
> > index d7dfb55bbbd7e1..0ede68eb912440 100644
> > --- a/common/rc
> > +++ b/common/rc
> > @@ -204,9 +204,33 @@ _get_hugepagesize()
> >  	awk '/Hugepagesize/ {print $2 * 1024}' /proc/meminfo
> >  }
> >  
> > +# Does dmesg have a --since flag?
> > +_dmesg_detect_since()
> > +{
> > +	if [ -z "$DMESG_HAS_SINCE" ]; then
> > +		test "$DMESG_HAS_SINCE" = "yes"
> > +		return
> > +	elif dmesg --help | grep -q -- --since; then
> > +		DMESG_HAS_SINCE=yes
> > +	else
> > +		DMESG_HAS_SINCE=no
> > +	fi
> > +}
> > +
> >  _mount()
> >  {
> > -    $MOUNT_PROG $*
> > +	$MOUNT_PROG $*
> > +	ret=$?
> > +	if [ "$ret" -ne 0 ]; then
> > +		echo "\"$MOUNT_PROG $*\" failed at $(date)" >>
> > "$seqres.mountfail?"
> > +		if _dmesg_detect_since; then
> > +			dmesg --since '30s ago' >> "$seqres.mountfail?"
> > +		else
> > +			dmesg | tail -n 100 >> "$seqres.mountfail?"
> Is it possible to grep for a mount failure message in dmesg and then
> capture the last n lines? Do you think that will be more accurate?

Alas no, because there's no standard mount failure log message for us to
latch onto.

> Also, do you think it is useful to make this 100 configurable instead
> of hardcoding? 

I suppose, but why do you need more than 100?

> > +		fi
> > +	fi
> > +
> > +	return $ret
> >  }
> >  
> >  # Call _mount to do mount operation but also save mountpoint to
> > diff --git a/common/report b/common/report
> > index 0e91e481f9725a..b57697f76dafb2 100644
> > --- a/common/report
> > +++ b/common/report
> > @@ -199,6 +199,7 @@ _xunit_make_testcase_report()
> >  		local out_src="${SRC_DIR}/${test_name}.out"
> >  		local full_file="${REPORT_DIR}/${test_name}.full"
> >  		local dmesg_file="${REPORT_DIR}/${test_name}.dmesg"
> > +		local
> > mountfail_file="${REPORT_DIR}/${test_name}.mountfail"
> >  		local outbad_file="${REPORT_DIR}/${test_name}.out.bad"
> >  		if [ -z "$_err_msg" ]; then
> >  			_err_msg="Test $test_name failed, reason
> > unknown"
> > @@ -225,6 +226,13 @@ _xunit_make_testcase_report()
> >  			printf ']]>\n'	>>$report
> >  			echo -e "\t\t</system-err>" >> $report
> >  		fi
> > +		if [ -z "$quiet" -a -f "$mountfail_file" ]; then
> > +			echo -e "\t\t<mount-failure>" >> $report
> > +			printf	'<![CDATA[\n' >>$report
> > +			cat "$mountfail_file" | tr -dc
> > '[:print:][:space:]' | encode_cdata >>$report
> > +			printf ']]>\n'	>>$report
> > +			echo -e "\t\t</mount-failure>" >> $report
> > +		fi
> >  		;;
> >  	*)
> >  		echo -e "\t\t<failure message=\"Unknown
> > test_status=$test_status\" type=\"TestFail\"/>" >> $report
> > diff --git a/tests/selftest/008 b/tests/selftest/008
> > new file mode 100755
> > index 00000000000000..db80ffe6f77339
> > --- /dev/null
> > +++ b/tests/selftest/008
> > @@ -0,0 +1,20 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +# Copyright (c) 2024-2025 Oracle.  All Rights Reserved.
> > +#
> > +# FS QA Test 008
> > +#
> > +# Test mount failure capture.
> > +#
> > +. ./common/preamble
> > +_begin_fstest selftest
> > +
> > +_require_command "$WIPEFS_PROG" wipefs
> > +_require_scratch
> > +
> > +$WIPEFS_PROG -a $SCRATCH_DEV
> > +_scratch_mount &>> $seqres.full
> Minor: Do you think adding some filtered messages from the captured
> dmesg logs in the output will be helpful?  

No, this test exists to make sure that the dmesg log is captured in
$RESULT_DIR.  We don't care about the mount(8) output.

--D

> > +
> > +# success, all done
> > +status=0
> > +exit
> > diff --git a/tests/selftest/008.out b/tests/selftest/008.out
> > new file mode 100644
> > index 00000000000000..aaff95f3f48372
> > --- /dev/null
> > +++ b/tests/selftest/008.out
> > @@ -0,0 +1 @@
> > +QA output created by 008
> > 
> 
>
diff mbox series

Patch

diff --git a/check b/check
index 9222cd7e4f8197..a46ea1a54d78bb 100755
--- a/check
+++ b/check
@@ -614,7 +614,7 @@  _stash_fail_loop_files() {
 	local seq_prefix="${REPORT_DIR}/${1}"
 	local cp_suffix="$2"
 
-	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core" ".hints"; do
+	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core" ".hints" ".mountfail"; do
 		rm -f "${seq_prefix}${i}${cp_suffix}"
 		if [ -f "${seq_prefix}${i}" ]; then
 			cp "${seq_prefix}${i}" "${seq_prefix}${i}${cp_suffix}"
@@ -994,6 +994,7 @@  function run_section()
 				      echo -n "	$seqnum -- "
 			cat $seqres.notrun
 			tc_status="notrun"
+			rm -f "$seqres.mountfail?"
 			_stash_test_status "$seqnum" "$tc_status"
 
 			# Unmount the scratch fs so that we can wipe the scratch
@@ -1053,6 +1054,7 @@  function run_section()
 		if [ ! -f $seq.out ]; then
 			_dump_err "no qualified output"
 			tc_status="fail"
+			rm -f "$seqres.mountfail?"
 			_stash_test_status "$seqnum" "$tc_status"
 			continue;
 		fi
@@ -1089,6 +1091,24 @@  function run_section()
 				rm -f $seqres.hints
 			fi
 		fi
+
+		if [ -f "$seqres.mountfail?" ]; then
+			if [ "$tc_status" = "fail" ]; then
+				# Let the user know if there were mount
+				# failures on a test that failed because that
+				# could be interesting.
+				mv "$seqres.mountfail?" "$seqres.mountfail"
+				_dump_err "check: possible mount failures (see $seqres.mountfail)"
+				test -f $seqres.mountfail && \
+					maybe_compress_logfile $seqres.mountfail $MAX_MOUNTFAIL_SIZE
+			else
+				# Don't retain mount failure logs for tests
+				# that pass or were skipped because some tests
+				# intentionally drive mount failures.
+				rm -f "$seqres.mountfail?"
+			fi
+		fi
+
 		_stash_test_status "$seqnum" "$tc_status"
 	done
 
diff --git a/common/rc b/common/rc
index d7dfb55bbbd7e1..0ede68eb912440 100644
--- a/common/rc
+++ b/common/rc
@@ -204,9 +204,33 @@  _get_hugepagesize()
 	awk '/Hugepagesize/ {print $2 * 1024}' /proc/meminfo
 }
 
+# Does dmesg have a --since flag?
+_dmesg_detect_since()
+{
+	if [ -z "$DMESG_HAS_SINCE" ]; then
+		test "$DMESG_HAS_SINCE" = "yes"
+		return
+	elif dmesg --help | grep -q -- --since; then
+		DMESG_HAS_SINCE=yes
+	else
+		DMESG_HAS_SINCE=no
+	fi
+}
+
 _mount()
 {
-    $MOUNT_PROG $*
+	$MOUNT_PROG $*
+	ret=$?
+	if [ "$ret" -ne 0 ]; then
+		echo "\"$MOUNT_PROG $*\" failed at $(date)" >> "$seqres.mountfail?"
+		if _dmesg_detect_since; then
+			dmesg --since '30s ago' >> "$seqres.mountfail?"
+		else
+			dmesg | tail -n 100 >> "$seqres.mountfail?"
+		fi
+	fi
+
+	return $ret
 }
 
 # Call _mount to do mount operation but also save mountpoint to
diff --git a/common/report b/common/report
index 0e91e481f9725a..b57697f76dafb2 100644
--- a/common/report
+++ b/common/report
@@ -199,6 +199,7 @@  _xunit_make_testcase_report()
 		local out_src="${SRC_DIR}/${test_name}.out"
 		local full_file="${REPORT_DIR}/${test_name}.full"
 		local dmesg_file="${REPORT_DIR}/${test_name}.dmesg"
+		local mountfail_file="${REPORT_DIR}/${test_name}.mountfail"
 		local outbad_file="${REPORT_DIR}/${test_name}.out.bad"
 		if [ -z "$_err_msg" ]; then
 			_err_msg="Test $test_name failed, reason unknown"
@@ -225,6 +226,13 @@  _xunit_make_testcase_report()
 			printf ']]>\n'	>>$report
 			echo -e "\t\t</system-err>" >> $report
 		fi
+		if [ -z "$quiet" -a -f "$mountfail_file" ]; then
+			echo -e "\t\t<mount-failure>" >> $report
+			printf	'<![CDATA[\n' >>$report
+			cat "$mountfail_file" | tr -dc '[:print:][:space:]' | encode_cdata >>$report
+			printf ']]>\n'	>>$report
+			echo -e "\t\t</mount-failure>" >> $report
+		fi
 		;;
 	*)
 		echo -e "\t\t<failure message=\"Unknown test_status=$test_status\" type=\"TestFail\"/>" >> $report
diff --git a/tests/selftest/008 b/tests/selftest/008
new file mode 100755
index 00000000000000..db80ffe6f77339
--- /dev/null
+++ b/tests/selftest/008
@@ -0,0 +1,20 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2024-2025 Oracle.  All Rights Reserved.
+#
+# FS QA Test 008
+#
+# Test mount failure capture.
+#
+. ./common/preamble
+_begin_fstest selftest
+
+_require_command "$WIPEFS_PROG" wipefs
+_require_scratch
+
+$WIPEFS_PROG -a $SCRATCH_DEV
+_scratch_mount &>> $seqres.full
+
+# success, all done
+status=0
+exit
diff --git a/tests/selftest/008.out b/tests/selftest/008.out
new file mode 100644
index 00000000000000..aaff95f3f48372
--- /dev/null
+++ b/tests/selftest/008.out
@@ -0,0 +1 @@ 
+QA output created by 008