diff mbox series

[2/2] check: capture dmesg of mount failures if test fails

Message ID 173568782758.2712126.12517347004514444060.stgit@frogsfrogsfrogs (mailing list archive)
State New, archived
Headers show
Series [1/2] treewide: convert all $MOUNT_PROG to _mount | expand

Commit Message

Darrick J. Wong Dec. 31, 2024, 11:56 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Capture the kernel output after a mount failure occurs.  If the test
itself fails, then keep the logging output for further diagnosis.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
 check                  |   22 +++++++++++++++++++++-
 common/rc              |   26 +++++++++++++++++++++++++-
 common/report          |    8 ++++++++
 tests/selftest/008     |   20 ++++++++++++++++++++
 tests/selftest/008.out |    1 +
 5 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100755 tests/selftest/008
 create mode 100644 tests/selftest/008.out

Comments

Nirjhar Roy Jan. 6, 2025, 11:18 a.m. UTC | #1
On Tue, 2024-12-31 at 15:56 -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Capture the kernel output after a mount failure occurs.  If the test
> itself fails, then keep the logging output for further diagnosis.
> 
> Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
> ---
>  check                  |   22 +++++++++++++++++++++-
>  common/rc              |   26 +++++++++++++++++++++++++-
>  common/report          |    8 ++++++++
>  tests/selftest/008     |   20 ++++++++++++++++++++
>  tests/selftest/008.out |    1 +
>  5 files changed, 75 insertions(+), 2 deletions(-)
>  create mode 100755 tests/selftest/008
>  create mode 100644 tests/selftest/008.out
> 
> 
> diff --git a/check b/check
> index 9222cd7e4f8197..a46ea1a54d78bb 100755
> --- a/check
> +++ b/check
> @@ -614,7 +614,7 @@ _stash_fail_loop_files() {
>  	local seq_prefix="${REPORT_DIR}/${1}"
>  	local cp_suffix="$2"
>  
> -	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core"
> ".hints"; do
> +	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core" ".hints"
> ".mountfail"; do
>  		rm -f "${seq_prefix}${i}${cp_suffix}"
>  		if [ -f "${seq_prefix}${i}" ]; then
>  			cp "${seq_prefix}${i}"
> "${seq_prefix}${i}${cp_suffix}"
> @@ -994,6 +994,7 @@ function run_section()
>  				      echo -n "	$seqnum -- "
>  			cat $seqres.notrun
>  			tc_status="notrun"
> +			rm -f "$seqres.mountfail?"
>  			_stash_test_status "$seqnum" "$tc_status"
>  
>  			# Unmount the scratch fs so that we can wipe
> the scratch
> @@ -1053,6 +1054,7 @@ function run_section()
>  		if [ ! -f $seq.out ]; then
>  			_dump_err "no qualified output"
>  			tc_status="fail"
> +			rm -f "$seqres.mountfail?"
>  			_stash_test_status "$seqnum" "$tc_status"
>  			continue;
>  		fi
> @@ -1089,6 +1091,24 @@ function run_section()
>  				rm -f $seqres.hints
>  			fi
>  		fi
> +
> +		if [ -f "$seqres.mountfail?" ]; then
> +			if [ "$tc_status" = "fail" ]; then
> +				# Let the user know if there were mount
> +				# failures on a test that failed
> because that
> +				# could be interesting.
> +				mv "$seqres.mountfail?"
> "$seqres.mountfail"
> +				_dump_err "check: possible mount
> failures (see $seqres.mountfail)"
> +				test -f $seqres.mountfail && \
> +					maybe_compress_logfile
> $seqres.mountfail $MAX_MOUNTFAIL_SIZE
> +			else
> +				# Don't retain mount failure logs for
> tests
> +				# that pass or were skipped because
> some tests
> +				# intentionally drive mount failures.
> +				rm -f "$seqres.mountfail?"
> +			fi
> +		fi
> +
>  		_stash_test_status "$seqnum" "$tc_status"
>  	done
>  
> diff --git a/common/rc b/common/rc
> index d7dfb55bbbd7e1..0ede68eb912440 100644
> --- a/common/rc
> +++ b/common/rc
> @@ -204,9 +204,33 @@ _get_hugepagesize()
>  	awk '/Hugepagesize/ {print $2 * 1024}' /proc/meminfo
>  }
>  
> +# Does dmesg have a --since flag?
> +_dmesg_detect_since()
> +{
> +	if [ -z "$DMESG_HAS_SINCE" ]; then
> +		test "$DMESG_HAS_SINCE" = "yes"
> +		return
> +	elif dmesg --help | grep -q -- --since; then
> +		DMESG_HAS_SINCE=yes
> +	else
> +		DMESG_HAS_SINCE=no
> +	fi
> +}
> +
>  _mount()
>  {
> -    $MOUNT_PROG $*
> +	$MOUNT_PROG $*
> +	ret=$?
> +	if [ "$ret" -ne 0 ]; then
> +		echo "\"$MOUNT_PROG $*\" failed at $(date)" >>
> "$seqres.mountfail?"
> +		if _dmesg_detect_since; then
> +			dmesg --since '30s ago' >> "$seqres.mountfail?"
> +		else
> +			dmesg | tail -n 100 >> "$seqres.mountfail?"
Is it possible to grep for a mount failure message in dmesg and then
capture the last n lines? Do you think that will be more accurate?

Also, do you think it is useful to make this 100 configurable instead
of hardcoding? 
> +		fi
> +	fi
> +
> +	return $ret
>  }
>  
>  # Call _mount to do mount operation but also save mountpoint to
> diff --git a/common/report b/common/report
> index 0e91e481f9725a..b57697f76dafb2 100644
> --- a/common/report
> +++ b/common/report
> @@ -199,6 +199,7 @@ _xunit_make_testcase_report()
>  		local out_src="${SRC_DIR}/${test_name}.out"
>  		local full_file="${REPORT_DIR}/${test_name}.full"
>  		local dmesg_file="${REPORT_DIR}/${test_name}.dmesg"
> +		local
> mountfail_file="${REPORT_DIR}/${test_name}.mountfail"
>  		local outbad_file="${REPORT_DIR}/${test_name}.out.bad"
>  		if [ -z "$_err_msg" ]; then
>  			_err_msg="Test $test_name failed, reason
> unknown"
> @@ -225,6 +226,13 @@ _xunit_make_testcase_report()
>  			printf ']]>\n'	>>$report
>  			echo -e "\t\t</system-err>" >> $report
>  		fi
> +		if [ -z "$quiet" -a -f "$mountfail_file" ]; then
> +			echo -e "\t\t<mount-failure>" >> $report
> +			printf	'<![CDATA[\n' >>$report
> +			cat "$mountfail_file" | tr -dc
> '[:print:][:space:]' | encode_cdata >>$report
> +			printf ']]>\n'	>>$report
> +			echo -e "\t\t</mount-failure>" >> $report
> +		fi
>  		;;
>  	*)
>  		echo -e "\t\t<failure message=\"Unknown
> test_status=$test_status\" type=\"TestFail\"/>" >> $report
> diff --git a/tests/selftest/008 b/tests/selftest/008
> new file mode 100755
> index 00000000000000..db80ffe6f77339
> --- /dev/null
> +++ b/tests/selftest/008
> @@ -0,0 +1,20 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2024-2025 Oracle.  All Rights Reserved.
> +#
> +# FS QA Test 008
> +#
> +# Test mount failure capture.
> +#
> +. ./common/preamble
> +_begin_fstest selftest
> +
> +_require_command "$WIPEFS_PROG" wipefs
> +_require_scratch
> +
> +$WIPEFS_PROG -a $SCRATCH_DEV
> +_scratch_mount &>> $seqres.full
Minor: Do you think adding some filtered messages from the captured
dmesg logs in the output will be helpful?  
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/selftest/008.out b/tests/selftest/008.out
> new file mode 100644
> index 00000000000000..aaff95f3f48372
> --- /dev/null
> +++ b/tests/selftest/008.out
> @@ -0,0 +1 @@
> +QA output created by 008
>
Darrick J. Wong Jan. 6, 2025, 11:52 p.m. UTC | #2
On Mon, Jan 06, 2025 at 04:48:34PM +0530, Nirjhar Roy wrote:
> On Tue, 2024-12-31 at 15:56 -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Capture the kernel output after a mount failure occurs.  If the test
> > itself fails, then keep the logging output for further diagnosis.
> > 
> > Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
> > ---
> >  check                  |   22 +++++++++++++++++++++-
> >  common/rc              |   26 +++++++++++++++++++++++++-
> >  common/report          |    8 ++++++++
> >  tests/selftest/008     |   20 ++++++++++++++++++++
> >  tests/selftest/008.out |    1 +
> >  5 files changed, 75 insertions(+), 2 deletions(-)
> >  create mode 100755 tests/selftest/008
> >  create mode 100644 tests/selftest/008.out
> > 
> > 
> > diff --git a/check b/check
> > index 9222cd7e4f8197..a46ea1a54d78bb 100755
> > --- a/check
> > +++ b/check
> > @@ -614,7 +614,7 @@ _stash_fail_loop_files() {
> >  	local seq_prefix="${REPORT_DIR}/${1}"
> >  	local cp_suffix="$2"
> >  
> > -	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core"
> > ".hints"; do
> > +	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core" ".hints"
> > ".mountfail"; do
> >  		rm -f "${seq_prefix}${i}${cp_suffix}"
> >  		if [ -f "${seq_prefix}${i}" ]; then
> >  			cp "${seq_prefix}${i}"
> > "${seq_prefix}${i}${cp_suffix}"
> > @@ -994,6 +994,7 @@ function run_section()
> >  				      echo -n "	$seqnum -- "
> >  			cat $seqres.notrun
> >  			tc_status="notrun"
> > +			rm -f "$seqres.mountfail?"
> >  			_stash_test_status "$seqnum" "$tc_status"
> >  
> >  			# Unmount the scratch fs so that we can wipe
> > the scratch
> > @@ -1053,6 +1054,7 @@ function run_section()
> >  		if [ ! -f $seq.out ]; then
> >  			_dump_err "no qualified output"
> >  			tc_status="fail"
> > +			rm -f "$seqres.mountfail?"
> >  			_stash_test_status "$seqnum" "$tc_status"
> >  			continue;
> >  		fi
> > @@ -1089,6 +1091,24 @@ function run_section()
> >  				rm -f $seqres.hints
> >  			fi
> >  		fi
> > +
> > +		if [ -f "$seqres.mountfail?" ]; then
> > +			if [ "$tc_status" = "fail" ]; then
> > +				# Let the user know if there were mount
> > +				# failures on a test that failed
> > because that
> > +				# could be interesting.
> > +				mv "$seqres.mountfail?"
> > "$seqres.mountfail"
> > +				_dump_err "check: possible mount
> > failures (see $seqres.mountfail)"
> > +				test -f $seqres.mountfail && \
> > +					maybe_compress_logfile
> > $seqres.mountfail $MAX_MOUNTFAIL_SIZE
> > +			else
> > +				# Don't retain mount failure logs for
> > tests
> > +				# that pass or were skipped because
> > some tests
> > +				# intentionally drive mount failures.
> > +				rm -f "$seqres.mountfail?"
> > +			fi
> > +		fi
> > +
> >  		_stash_test_status "$seqnum" "$tc_status"
> >  	done
> >  
> > diff --git a/common/rc b/common/rc
> > index d7dfb55bbbd7e1..0ede68eb912440 100644
> > --- a/common/rc
> > +++ b/common/rc
> > @@ -204,9 +204,33 @@ _get_hugepagesize()
> >  	awk '/Hugepagesize/ {print $2 * 1024}' /proc/meminfo
> >  }
> >  
> > +# Does dmesg have a --since flag?
> > +_dmesg_detect_since()
> > +{
> > +	if [ -z "$DMESG_HAS_SINCE" ]; then
> > +		test "$DMESG_HAS_SINCE" = "yes"
> > +		return
> > +	elif dmesg --help | grep -q -- --since; then
> > +		DMESG_HAS_SINCE=yes
> > +	else
> > +		DMESG_HAS_SINCE=no
> > +	fi
> > +}
> > +
> >  _mount()
> >  {
> > -    $MOUNT_PROG $*
> > +	$MOUNT_PROG $*
> > +	ret=$?
> > +	if [ "$ret" -ne 0 ]; then
> > +		echo "\"$MOUNT_PROG $*\" failed at $(date)" >>
> > "$seqres.mountfail?"
> > +		if _dmesg_detect_since; then
> > +			dmesg --since '30s ago' >> "$seqres.mountfail?"
> > +		else
> > +			dmesg | tail -n 100 >> "$seqres.mountfail?"
> Is it possible to grep for a mount failure message in dmesg and then
> capture the last n lines? Do you think that will be more accurate?

Alas no, because there's no standard mount failure log message for us to
latch onto.

> Also, do you think it is useful to make this 100 configurable instead
> of hardcoding? 

I suppose, but why do you need more than 100?

> > +		fi
> > +	fi
> > +
> > +	return $ret
> >  }
> >  
> >  # Call _mount to do mount operation but also save mountpoint to
> > diff --git a/common/report b/common/report
> > index 0e91e481f9725a..b57697f76dafb2 100644
> > --- a/common/report
> > +++ b/common/report
> > @@ -199,6 +199,7 @@ _xunit_make_testcase_report()
> >  		local out_src="${SRC_DIR}/${test_name}.out"
> >  		local full_file="${REPORT_DIR}/${test_name}.full"
> >  		local dmesg_file="${REPORT_DIR}/${test_name}.dmesg"
> > +		local
> > mountfail_file="${REPORT_DIR}/${test_name}.mountfail"
> >  		local outbad_file="${REPORT_DIR}/${test_name}.out.bad"
> >  		if [ -z "$_err_msg" ]; then
> >  			_err_msg="Test $test_name failed, reason
> > unknown"
> > @@ -225,6 +226,13 @@ _xunit_make_testcase_report()
> >  			printf ']]>\n'	>>$report
> >  			echo -e "\t\t</system-err>" >> $report
> >  		fi
> > +		if [ -z "$quiet" -a -f "$mountfail_file" ]; then
> > +			echo -e "\t\t<mount-failure>" >> $report
> > +			printf	'<![CDATA[\n' >>$report
> > +			cat "$mountfail_file" | tr -dc
> > '[:print:][:space:]' | encode_cdata >>$report
> > +			printf ']]>\n'	>>$report
> > +			echo -e "\t\t</mount-failure>" >> $report
> > +		fi
> >  		;;
> >  	*)
> >  		echo -e "\t\t<failure message=\"Unknown
> > test_status=$test_status\" type=\"TestFail\"/>" >> $report
> > diff --git a/tests/selftest/008 b/tests/selftest/008
> > new file mode 100755
> > index 00000000000000..db80ffe6f77339
> > --- /dev/null
> > +++ b/tests/selftest/008
> > @@ -0,0 +1,20 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +# Copyright (c) 2024-2025 Oracle.  All Rights Reserved.
> > +#
> > +# FS QA Test 008
> > +#
> > +# Test mount failure capture.
> > +#
> > +. ./common/preamble
> > +_begin_fstest selftest
> > +
> > +_require_command "$WIPEFS_PROG" wipefs
> > +_require_scratch
> > +
> > +$WIPEFS_PROG -a $SCRATCH_DEV
> > +_scratch_mount &>> $seqres.full
> Minor: Do you think adding some filtered messages from the captured
> dmesg logs in the output will be helpful?  

No, this test exists to make sure that the dmesg log is captured in
$RESULT_DIR.  We don't care about the mount(8) output.

--D

> > +
> > +# success, all done
> > +status=0
> > +exit
> > diff --git a/tests/selftest/008.out b/tests/selftest/008.out
> > new file mode 100644
> > index 00000000000000..aaff95f3f48372
> > --- /dev/null
> > +++ b/tests/selftest/008.out
> > @@ -0,0 +1 @@
> > +QA output created by 008
> > 
> 
>
Nirjhar Roy Jan. 13, 2025, 5:55 a.m. UTC | #3
On Mon, 2025-01-06 at 15:52 -0800, Darrick J. Wong wrote:
> On Mon, Jan 06, 2025 at 04:48:34PM +0530, Nirjhar Roy wrote:
> > On Tue, 2024-12-31 at 15:56 -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > > 
> > > Capture the kernel output after a mount failure occurs.  If the
> > > test
> > > itself fails, then keep the logging output for further diagnosis.
> > > 
> > > Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
> > > ---
> > >  check                  |   22 +++++++++++++++++++++-
> > >  common/rc              |   26 +++++++++++++++++++++++++-
> > >  common/report          |    8 ++++++++
> > >  tests/selftest/008     |   20 ++++++++++++++++++++
> > >  tests/selftest/008.out |    1 +
> > >  5 files changed, 75 insertions(+), 2 deletions(-)
> > >  create mode 100755 tests/selftest/008
> > >  create mode 100644 tests/selftest/008.out
> > > 
> > > 
> > > diff --git a/check b/check
> > > index 9222cd7e4f8197..a46ea1a54d78bb 100755
> > > --- a/check
> > > +++ b/check
> > > @@ -614,7 +614,7 @@ _stash_fail_loop_files() {
> > >  	local seq_prefix="${REPORT_DIR}/${1}"
> > >  	local cp_suffix="$2"
> > >  
> > > -	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core"
> > > ".hints"; do
> > > +	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core" ".hints"
> > > ".mountfail"; do
> > >  		rm -f "${seq_prefix}${i}${cp_suffix}"
> > >  		if [ -f "${seq_prefix}${i}" ]; then
> > >  			cp "${seq_prefix}${i}"
> > > "${seq_prefix}${i}${cp_suffix}"
> > > @@ -994,6 +994,7 @@ function run_section()
> > >  				      echo -n "	$seqnum -- "
> > >  			cat $seqres.notrun
> > >  			tc_status="notrun"
> > > +			rm -f "$seqres.mountfail?"
> > >  			_stash_test_status "$seqnum" "$tc_status"
> > >  
> > >  			# Unmount the scratch fs so that we can wipe
> > > the scratch
> > > @@ -1053,6 +1054,7 @@ function run_section()
> > >  		if [ ! -f $seq.out ]; then
> > >  			_dump_err "no qualified output"
> > >  			tc_status="fail"
> > > +			rm -f "$seqres.mountfail?"
> > >  			_stash_test_status "$seqnum" "$tc_status"
> > >  			continue;
> > >  		fi
> > > @@ -1089,6 +1091,24 @@ function run_section()
> > >  				rm -f $seqres.hints
> > >  			fi
> > >  		fi
> > > +
> > > +		if [ -f "$seqres.mountfail?" ]; then
> > > +			if [ "$tc_status" = "fail" ]; then
> > > +				# Let the user know if there were mount
> > > +				# failures on a test that failed
> > > because that
> > > +				# could be interesting.
> > > +				mv "$seqres.mountfail?"
> > > "$seqres.mountfail"
> > > +				_dump_err "check: possible mount
> > > failures (see $seqres.mountfail)"
> > > +				test -f $seqres.mountfail && \
> > > +					maybe_compress_logfile
> > > $seqres.mountfail $MAX_MOUNTFAIL_SIZE
> > > +			else
> > > +				# Don't retain mount failure logs for
> > > tests
> > > +				# that pass or were skipped because
> > > some tests
> > > +				# intentionally drive mount failures.
> > > +				rm -f "$seqres.mountfail?"
> > > +			fi
> > > +		fi
> > > +
> > >  		_stash_test_status "$seqnum" "$tc_status"
> > >  	done
> > >  
> > > diff --git a/common/rc b/common/rc
> > > index d7dfb55bbbd7e1..0ede68eb912440 100644
> > > --- a/common/rc
> > > +++ b/common/rc
> > > @@ -204,9 +204,33 @@ _get_hugepagesize()
> > >  	awk '/Hugepagesize/ {print $2 * 1024}' /proc/meminfo
> > >  }
> > >  
> > > +# Does dmesg have a --since flag?
> > > +_dmesg_detect_since()
> > > +{
> > > +	if [ -z "$DMESG_HAS_SINCE" ]; then
> > > +		test "$DMESG_HAS_SINCE" = "yes"
> > > +		return
> > > +	elif dmesg --help | grep -q -- --since; then
> > > +		DMESG_HAS_SINCE=yes
> > > +	else
> > > +		DMESG_HAS_SINCE=no
> > > +	fi
> > > +}
> > > +
> > >  _mount()
> > >  {
> > > -    $MOUNT_PROG $*
> > > +	$MOUNT_PROG $*
> > > +	ret=$?
> > > +	if [ "$ret" -ne 0 ]; then
> > > +		echo "\"$MOUNT_PROG $*\" failed at $(date)" >>
> > > "$seqres.mountfail?"
> > > +		if _dmesg_detect_since; then
> > > +			dmesg --since '30s ago' >> "$seqres.mountfail?"
> > > +		else
> > > +			dmesg | tail -n 100 >> "$seqres.mountfail?"
> > Is it possible to grep for a mount failure message in dmesg and
> > then
> > capture the last n lines? Do you think that will be more accurate?
> 
> Alas no, because there's no standard mount failure log message for us
> to
> latch onto.
Okay makes sense. 
> 
> > Also, do you think it is useful to make this 100 configurable
> > instead
> > of hardcoding? 
> 
> I suppose, but why do you need more than 100?
So my thought behind this is that in case, the dmesg gets cluttered
with noisy logs from other processes. No hard preferences though. 
> 
> > > +		fi
> > > +	fi
> > > +
> > > +	return $ret
> > >  }
> > >  
> > >  # Call _mount to do mount operation but also save mountpoint to
> > > diff --git a/common/report b/common/report
> > > index 0e91e481f9725a..b57697f76dafb2 100644
> > > --- a/common/report
> > > +++ b/common/report
> > > @@ -199,6 +199,7 @@ _xunit_make_testcase_report()
> > >  		local out_src="${SRC_DIR}/${test_name}.out"
> > >  		local full_file="${REPORT_DIR}/${test_name}.full"
> > >  		local dmesg_file="${REPORT_DIR}/${test_name}.dmesg"
> > > +		local
> > > mountfail_file="${REPORT_DIR}/${test_name}.mountfail"
> > >  		local outbad_file="${REPORT_DIR}/${test_name}.out.bad"
> > >  		if [ -z "$_err_msg" ]; then
> > >  			_err_msg="Test $test_name failed, reason
> > > unknown"
> > > @@ -225,6 +226,13 @@ _xunit_make_testcase_report()
> > >  			printf ']]>\n'	>>$report
> > >  			echo -e "\t\t</system-err>" >> $report
> > >  		fi
> > > +		if [ -z "$quiet" -a -f "$mountfail_file" ]; then
> > > +			echo -e "\t\t<mount-failure>" >> $report
> > > +			printf	'<![CDATA[\n' >>$report
> > > +			cat "$mountfail_file" | tr -dc
> > > '[:print:][:space:]' | encode_cdata >>$report
> > > +			printf ']]>\n'	>>$report
> > > +			echo -e "\t\t</mount-failure>" >> $report
> > > +		fi
> > >  		;;
> > >  	*)
> > >  		echo -e "\t\t<failure message=\"Unknown
> > > test_status=$test_status\" type=\"TestFail\"/>" >> $report
> > > diff --git a/tests/selftest/008 b/tests/selftest/008
> > > new file mode 100755
> > > index 00000000000000..db80ffe6f77339
> > > --- /dev/null
> > > +++ b/tests/selftest/008
> > > @@ -0,0 +1,20 @@
> > > +#! /bin/bash
> > > +# SPDX-License-Identifier: GPL-2.0
> > > +# Copyright (c) 2024-2025 Oracle.  All Rights Reserved.
> > > +#
> > > +# FS QA Test 008
> > > +#
> > > +# Test mount failure capture.
> > > +#
> > > +. ./common/preamble
> > > +_begin_fstest selftest
> > > +
> > > +_require_command "$WIPEFS_PROG" wipefs
> > > +_require_scratch
> > > +
> > > +$WIPEFS_PROG -a $SCRATCH_DEV
> > > +_scratch_mount &>> $seqres.full
> > Minor: Do you think adding some filtered messages from the captured
> > dmesg logs in the output will be helpful?  
> 
> No, this test exists to make sure that the dmesg log is captured in
> $RESULT_DIR.  We don't care about the mount(8) output.
> 
> --D
Okay, got it.
--NR 
> 
> > > +
> > > +# success, all done
> > > +status=0
> > > +exit
> > > diff --git a/tests/selftest/008.out b/tests/selftest/008.out
> > > new file mode 100644
> > > index 00000000000000..aaff95f3f48372
> > > --- /dev/null
> > > +++ b/tests/selftest/008.out
> > > @@ -0,0 +1 @@
> > > +QA output created by 008
> > >
diff mbox series

Patch

diff --git a/check b/check
index 9222cd7e4f8197..a46ea1a54d78bb 100755
--- a/check
+++ b/check
@@ -614,7 +614,7 @@  _stash_fail_loop_files() {
 	local seq_prefix="${REPORT_DIR}/${1}"
 	local cp_suffix="$2"
 
-	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core" ".hints"; do
+	for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core" ".hints" ".mountfail"; do
 		rm -f "${seq_prefix}${i}${cp_suffix}"
 		if [ -f "${seq_prefix}${i}" ]; then
 			cp "${seq_prefix}${i}" "${seq_prefix}${i}${cp_suffix}"
@@ -994,6 +994,7 @@  function run_section()
 				      echo -n "	$seqnum -- "
 			cat $seqres.notrun
 			tc_status="notrun"
+			rm -f "$seqres.mountfail?"
 			_stash_test_status "$seqnum" "$tc_status"
 
 			# Unmount the scratch fs so that we can wipe the scratch
@@ -1053,6 +1054,7 @@  function run_section()
 		if [ ! -f $seq.out ]; then
 			_dump_err "no qualified output"
 			tc_status="fail"
+			rm -f "$seqres.mountfail?"
 			_stash_test_status "$seqnum" "$tc_status"
 			continue;
 		fi
@@ -1089,6 +1091,24 @@  function run_section()
 				rm -f $seqres.hints
 			fi
 		fi
+
+		if [ -f "$seqres.mountfail?" ]; then
+			if [ "$tc_status" = "fail" ]; then
+				# Let the user know if there were mount
+				# failures on a test that failed because that
+				# could be interesting.
+				mv "$seqres.mountfail?" "$seqres.mountfail"
+				_dump_err "check: possible mount failures (see $seqres.mountfail)"
+				test -f $seqres.mountfail && \
+					maybe_compress_logfile $seqres.mountfail $MAX_MOUNTFAIL_SIZE
+			else
+				# Don't retain mount failure logs for tests
+				# that pass or were skipped because some tests
+				# intentionally drive mount failures.
+				rm -f "$seqres.mountfail?"
+			fi
+		fi
+
 		_stash_test_status "$seqnum" "$tc_status"
 	done
 
diff --git a/common/rc b/common/rc
index d7dfb55bbbd7e1..0ede68eb912440 100644
--- a/common/rc
+++ b/common/rc
@@ -204,9 +204,33 @@  _get_hugepagesize()
 	awk '/Hugepagesize/ {print $2 * 1024}' /proc/meminfo
 }
 
+# Does dmesg have a --since flag?
+_dmesg_detect_since()
+{
+	if [ -z "$DMESG_HAS_SINCE" ]; then
+		test "$DMESG_HAS_SINCE" = "yes"
+		return
+	elif dmesg --help | grep -q -- --since; then
+		DMESG_HAS_SINCE=yes
+	else
+		DMESG_HAS_SINCE=no
+	fi
+}
+
 _mount()
 {
-    $MOUNT_PROG $*
+	$MOUNT_PROG $*
+	ret=$?
+	if [ "$ret" -ne 0 ]; then
+		echo "\"$MOUNT_PROG $*\" failed at $(date)" >> "$seqres.mountfail?"
+		if _dmesg_detect_since; then
+			dmesg --since '30s ago' >> "$seqres.mountfail?"
+		else
+			dmesg | tail -n 100 >> "$seqres.mountfail?"
+		fi
+	fi
+
+	return $ret
 }
 
 # Call _mount to do mount operation but also save mountpoint to
diff --git a/common/report b/common/report
index 0e91e481f9725a..b57697f76dafb2 100644
--- a/common/report
+++ b/common/report
@@ -199,6 +199,7 @@  _xunit_make_testcase_report()
 		local out_src="${SRC_DIR}/${test_name}.out"
 		local full_file="${REPORT_DIR}/${test_name}.full"
 		local dmesg_file="${REPORT_DIR}/${test_name}.dmesg"
+		local mountfail_file="${REPORT_DIR}/${test_name}.mountfail"
 		local outbad_file="${REPORT_DIR}/${test_name}.out.bad"
 		if [ -z "$_err_msg" ]; then
 			_err_msg="Test $test_name failed, reason unknown"
@@ -225,6 +226,13 @@  _xunit_make_testcase_report()
 			printf ']]>\n'	>>$report
 			echo -e "\t\t</system-err>" >> $report
 		fi
+		if [ -z "$quiet" -a -f "$mountfail_file" ]; then
+			echo -e "\t\t<mount-failure>" >> $report
+			printf	'<![CDATA[\n' >>$report
+			cat "$mountfail_file" | tr -dc '[:print:][:space:]' | encode_cdata >>$report
+			printf ']]>\n'	>>$report
+			echo -e "\t\t</mount-failure>" >> $report
+		fi
 		;;
 	*)
 		echo -e "\t\t<failure message=\"Unknown test_status=$test_status\" type=\"TestFail\"/>" >> $report
diff --git a/tests/selftest/008 b/tests/selftest/008
new file mode 100755
index 00000000000000..db80ffe6f77339
--- /dev/null
+++ b/tests/selftest/008
@@ -0,0 +1,20 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2024-2025 Oracle.  All Rights Reserved.
+#
+# FS QA Test 008
+#
+# Test mount failure capture.
+#
+. ./common/preamble
+_begin_fstest selftest
+
+_require_command "$WIPEFS_PROG" wipefs
+_require_scratch
+
+$WIPEFS_PROG -a $SCRATCH_DEV
+_scratch_mount &>> $seqres.full
+
+# success, all done
+status=0
+exit
diff --git a/tests/selftest/008.out b/tests/selftest/008.out
new file mode 100644
index 00000000000000..aaff95f3f48372
--- /dev/null
+++ b/tests/selftest/008.out
@@ -0,0 +1 @@ 
+QA output created by 008