diff mbox series

[blktests] tests/nvme: add tests for error logging

Message ID 20220429220946.22099-1-alan.adamson@oracle.com (mailing list archive)
State New, archived
Headers show
Series [blktests] tests/nvme: add tests for error logging | expand

Commit Message

Alan Adamson April 29, 2022, 10:09 p.m. UTC
Test nvme error logging by injecting errors. Kernel must have FAULT_INJECTION
and FAULT_INJECTION_DEBUG_FS configured to use error injector. Tests can be
run with or without NVME_VERBOSE_ERRORS configured.

These test verify the functionality delivered by the follow commit:
        nvme: add verbose error logging

Signed-off-by: Alan Adamson <alan.adamson@oracle.com>
Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
---
 tests/nvme/039     | 174 +++++++++++++++++++++++++++++++++++++++++++++
 tests/nvme/039.out |   7 ++
 2 files changed, 181 insertions(+)
 create mode 100755 tests/nvme/039
 create mode 100644 tests/nvme/039.out

Comments

Shinichiro Kawasaki May 9, 2022, 10:52 a.m. UTC | #1
Hello Alan,

I ran the test case this patch adds and confirmed it works as expected. Good.
I found some points to improve in the patch. Please find my comments in line.

On Apr 29, 2022 / 15:09, Alan Adamson wrote:
> Test nvme error logging by injecting errors. Kernel must have FAULT_INJECTION
> and FAULT_INJECTION_DEBUG_FS configured to use error injector. Tests can be
> run with or without NVME_VERBOSE_ERRORS configured.
> 
> These test verify the functionality delivered by the follow commit:
>         nvme: add verbose error logging

The commit was already up-streamed. It will be helpful to add git hash of the
commit.

> 
> Signed-off-by: Alan Adamson <alan.adamson@oracle.com>
> Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
> ---
>  tests/nvme/039     | 174 +++++++++++++++++++++++++++++++++++++++++++++
>  tests/nvme/039.out |   7 ++
>  2 files changed, 181 insertions(+)
>  create mode 100755 tests/nvme/039
>  create mode 100644 tests/nvme/039.out
> 
> diff --git a/tests/nvme/039 b/tests/nvme/039
> new file mode 100755
> index 000000000000..e30de0731247
> --- /dev/null
> +++ b/tests/nvme/039
> @@ -0,0 +1,174 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-3.0+
> +# Copyright (C) 2022 Oracle and/or its affiliates
> +#
> +# Test nvme error logging by injecting errors. Kernel must have FAULT_INJECTION
> +# and FAULT_INJECTION_DEBUG_FS configured to use error injector. Tests can be
> +# run with or without NVME_VERBOSE_ERRORS configured.

It will be helpful to note the kernel commit here also.

> +
> +. tests/nvme/rc
> +DESCRIPTION="test error logging"
> +QUICK=1
> +
> +requires() {
> +	_nvme_requires
> +	_have_kernel_option FAULT_INJECTION && \
> +	    _have_kernel_option FAULT_INJECTION_DEBUG_FS
> +	_have_program dd && _have_program nvme _have_program sed

I think the line above is not required. dd and sed are so common and we can
assume they are available. And _nvme_requries checks the nvme command.

> +	_require_nvme_trtype_is_loop

Can't we run this test with transport type other than loop?

> +}
> +
> +save_err_inject_attr()
> +{
> +	ns_dev_verbose_save=$(cat /sys/kernel/debug/"${ns_dev}"/fault_inject/verbose)
> +	ns_dev_probability_save=$(cat /sys/kernel/debug/"${ns_dev}"/fault_inject/probability)
> +	ns_dev_dont_retry_save=$(cat /sys/kernel/debug/"${ns_dev}"/fault_inject/dont_retry)
> +	ns_dev_dont_status_save=$(cat /sys/kernel/debug/"${ns_dev}"/fault_inject/status)
> +	ns_dev_dont_times_save=$(cat /sys/kernel/debug/"${ns_dev}"/fault_inject/times)
> +	ctrl_dev_verbose_save=$(cat /sys/kernel/debug/"${ctrl_dev}"/fault_inject/verbose)
> +	ctrl_dev_probability_save=$(cat /sys/kernel/debug/"${ctrl_dev}"/fault_inject/probability)
> +	ctrl_dev_dont_retry_save=$(cat /sys/kernel/debug/"${ctrl_dev}"/fault_inject/dont_retry)
> +	ctrl_dev_dont_status_save=$(cat /sys/kernel/debug/"${ctrl_dev}"/fault_inject/status)
> +	ctrl_dev_dont_times_save=$(cat /sys/kernel/debug/"${ctrl_dev}"/fault_inject/times)
> +}
> +
> +restore_error_inject_attr()
> +{
> +	echo "${ns_dev_verbose_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/verbose
> +	echo "${ns_dev_verbose_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/verbose
> +	echo "${ns_dev_probability_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/probability
> +	echo "${ns_dev_dont_retry_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/dont_retry
> +	echo "${ns_dev_dont_status_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/status
> +	echo "${ns_dev_dont_times_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/times
> +	echo "${ctrl_dev_verbose_save}" > /sys/kernel/debug/"${ctrl_dev}"/fault_inject/verbose 
> +	echo "${ctrl_dev_probability_save}" > /sys/kernel/debug/"${ctrl_dev}"/fault_inject/probability
> +	echo "${ctrl_dev_dont_retry_save}" > /sys/kernel/debug/"${ctrl_dev}"/fault_inject/dont_retry
> +	echo "${ctrl_dev_dont_status_save}" > /sys/kernel/debug/"${ctrl_dev}"/fault_inject/status
> +	echo "${ctrl_dev_dont_times_save}" > /sys/kernel/debug/"${ctrl_dev}"/fault_inject/times
> +}

The two functions above repeat similar lengthy lines. How about to use
associative arrays to avoid the repetition?

declare -A NS_DEV_FAULT_INJECT_SAVE
declare -A CTRL_DEV_FAULT_INJECT_SAVE

save_err_inject_attr()
{
	local a

	for a in /sys/kernel/debug/"${ns_dev}"/fault_inject/*; do
		NS_DEV_FAULT_INJECT_SAVE[${a}]=$(<"${a}")
	done
	for a in /sys/kernel/debug/"${ctrl_dev}"/fault_inject/*; do
		CTRL_DEV_FAULT_INJECT_SAVE[${a}]=$(<"${a}")
	done
}

> +
> +set_verbose_prob_retry()
> +{
> +	echo 0 > /sys/kernel/debug/"$1"/fault_inject/verbose
> +	echo 100 > /sys/kernel/debug/"$1"/fault_inject/probability
> +	echo 1 > /sys/kernel/debug/"$1"/fault_inject/dont_retry
> +}
> +
> +set_status_time()
> +{
> +	echo "$1" > /sys/kernel/debug/"$3"/fault_inject/status
> +	echo "$2" > /sys/kernel/debug/"$3"/fault_inject/times
> +}
> +
> +inject_unrec_read_err()
> +{
> +	set_verbose_prob_retry "${ns_dev}"
> +#	Inject a 'Unrecovered Read Error' error on a READ

One liner comments in this patch have weird position of '#'. I suggest to place
it at same column offset as code.

> +	set_status_time 0x281 1 "${ns_dev}"
> +	dd if="${TEST_DEV}" of=/dev/null bs=512 count=1 iflag=direct 2> /dev/null 1>&2

Some lines in this patch exceeds 80 characters. I suggest to fold them into two
lines with backslashes.

> +	if ${nvme_verbose_errors}; then
> +		dmesg -t | tail -2 | grep "Unrecovered Read Error (" | \
> +		    sed 's/nvme.*://g'
> +	else
> +		dmesg -t | tail -2 | grep "Cmd(" | sed 's/I\/O Cmd/Read/g' | \
> +		    sed 's/I\/O Error/Unrecovered Read Error/g' | \
> +		    sed 's/nvme.*://g'
> +	fi
> +}
> +
> +inject_invalid_read_err()
> +{
> +#	Inject a valid invalid error status (0x375) on a READ
> +	set_status_time 0x375 1 "${ns_dev}"
> +	dd if="${TEST_DEV}" of=/dev/null bs=512 count=1 iflag=direct 2> /dev/null 1>&2
> +	if ${nvme_verbose_errors}; then
> +		dmesg -t | tail -2 | grep "Unknown (" | \
> +		    sed 's/nvme.*://g'
> +	else
> +		dmesg -t | tail -2 | grep "Cmd(" | sed 's/I\/O Cmd/Read/g' | \
> +		    sed 's/I\/O Error/Unknown/g' | \
> +		    sed 's/nvme.*://g'
> +	fi
> +}
> +
> +inject_write_fault()
> +{
> +#	Inject a 'Write Fault' error on a WRITE
> +	set_status_time 0x280 1 "${ns_dev}"
> +	dd if=/dev/zero of="${TEST_DEV}" bs=512 count=1 oflag=direct 2> /dev/null 1>&2
> +	if ${nvme_verbose_errors}; then
> +		dmesg -t | tail -2 | grep "Write Fault (" | \
> +		    sed 's/nvme.*://g'
> +	else
> +		dmesg -t | tail -2 | grep "Cmd(" | sed 's/I\/O Cmd/Write/g' | \
> +		    sed 's/I\/O Error/Write Fault/g' | \
> +		    sed 's/nvme.*://g'
> +	fi
> +}
> +
> +inject_id_admin()
> +{
> +#	Inject a valid (Identify) Admin command
> +	set_status_time 0x286 1000 "${ctrl_dev}"
> +	nvme admin-passthru /dev/"${ctrl_dev}" --opcode=0x06 --data-len=4096 --cdw10=1 -r 2> /dev/null 1>&2
> +	if ${nvme_verbose_errors}; then
> +		dmesg -t | tail -1 | grep "Access Denied (" | \
> +		    sed 's/nvme.*://g'
> +	else
> +		dmesg -t | tail -1 | grep "Admin Cmd(" | sed 's/Admin Cmd/Identify/g' | \
> +		    sed 's/I\/O Error/Access Denied/g' | \
> +		    sed 's/nvme.*://g'
> +	fi
> +}
> +
> +inject_invalid_cmd()
> +{
> +#	Inject an invalid command (0x96)
> +	set_status_time 0x1 1 "${ctrl_dev}"
> +	nvme admin-passthru /dev/"${ctrl_dev}" --opcode=0x96 --data-len=4096 --cdw10=1 -r 2> /dev/null 1>&2
> +	if ${nvme_verbose_errors}; then
> +		dmesg -t | tail -1 | grep "Invalid Command Opcode (" | \
> +		    sed 's/nvme.*://g'
> +	else
> +		dmesg -t | tail -1 | grep "Admin Cmd(" | sed 's/Admin Cmd/Unknown/g' | \
> +		    sed 's/I\/O Error/Invalid Command Opcode/g' | \
> +		    sed 's/nvme.*://g'
> +	fi
> +}
> +
> +test_device() {
> +	local nvme_verbose_errors;
> +	local ns_dev;
> +	local ctrl_dev;

The semicolons above are not required. Same for nvme_verbose_errors substitues
below.

> +
> +	echo "Running ${TEST_NAME}"
> +
> +	if _have_kernel_option NVME_VERBOSE_ERRORS; then
> +		nvme_verbose_errors=true;
> +	else
> +		unset SKIP_REASON
> +		nvme_verbose_errors=false;
> +	fi
> +
> +	ns_dev=$(echo "${TEST_DEV}" | sed 's/\/dev\///g')
> +	ctrl_dev=$(echo "${TEST_DEV}" | sed 's/\/dev\///g' | sed 's/n[0-9]*//2')

The two lines above works ok. Just FYI, they can be simplified with bash
features:

	ns_dev=${TEST_DEV##*/}
	ctrl_dev=${ns_dev%n*}

> +
> +#	Save Error Injector Attributes
> +	save_err_inject_attr

The function name is self-descriptive. The comment above is not so meaningful.

> +
> +	inject_unrec_read_err
> +
> +	inject_invalid_read_err
> +
> +	inject_write_fault
> +
> +	set_verbose_prob_retry "${ctrl_dev}"

set_verbose_prob_retry() is called for ctrl_dev in test_device(), but it is
called for ns_dev in inject_unrec_read_err(). This does not look consistent.
It would be the better to call set_verbose_prob_retry() only in test_device(),
probably. Also, it would be good to provide ns_dev or ctrl_dev to inject_*()
functions as an argument to clarify which device the functions use.

> +	inject_id_admin
> +
> +	inject_invalid_cmd
> +
> +#	Restore Error Injector Attributes
> +	restore_error_inject_attr

Again, the comment above is not so meaningful. And helper functions in this
patch abbreviates the word 'error' to 'err', but only the function above spells
it out and looks weird for me.

> +
> +	echo "Test complete"
> +}
> diff --git a/tests/nvme/039.out b/tests/nvme/039.out
> new file mode 100644
> index 000000000000..162935eb1d7b
> --- /dev/null
> +++ b/tests/nvme/039.out
> @@ -0,0 +1,7 @@
> +Running nvme/039
> + Read(0x2) @ LBA 0, 1 blocks, Unrecovered Read Error (sct 0x2 / sc 0x81) DNR 
> + Read(0x2) @ LBA 0, 1 blocks, Unknown (sct 0x3 / sc 0x75) DNR 
> + Write(0x1) @ LBA 0, 1 blocks, Write Fault (sct 0x2 / sc 0x80) DNR 
> + Identify(0x6), Access Denied (sct 0x2 / sc 0x86) DNR 
> + Unknown(0x96), Invalid Command Opcode (sct 0x0 / sc 0x1) DNR 
> +Test complete
> -- 
> 2.27.0
>
diff mbox series

Patch

diff --git a/tests/nvme/039 b/tests/nvme/039
new file mode 100755
index 000000000000..e30de0731247
--- /dev/null
+++ b/tests/nvme/039
@@ -0,0 +1,174 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0+
+# Copyright (C) 2022 Oracle and/or its affiliates
+#
+# Test nvme error logging by injecting errors. Kernel must have FAULT_INJECTION
+# and FAULT_INJECTION_DEBUG_FS configured to use error injector. Tests can be
+# run with or without NVME_VERBOSE_ERRORS configured.
+
+. tests/nvme/rc
+DESCRIPTION="test error logging"
+QUICK=1
+
+requires() {
+	_nvme_requires
+	_have_kernel_option FAULT_INJECTION && \
+	    _have_kernel_option FAULT_INJECTION_DEBUG_FS
+	_have_program dd && _have_program nvme _have_program sed
+	_require_nvme_trtype_is_loop
+}
+
+save_err_inject_attr()
+{
+	ns_dev_verbose_save=$(cat /sys/kernel/debug/"${ns_dev}"/fault_inject/verbose)
+	ns_dev_probability_save=$(cat /sys/kernel/debug/"${ns_dev}"/fault_inject/probability)
+	ns_dev_dont_retry_save=$(cat /sys/kernel/debug/"${ns_dev}"/fault_inject/dont_retry)
+	ns_dev_dont_status_save=$(cat /sys/kernel/debug/"${ns_dev}"/fault_inject/status)
+	ns_dev_dont_times_save=$(cat /sys/kernel/debug/"${ns_dev}"/fault_inject/times)
+	ctrl_dev_verbose_save=$(cat /sys/kernel/debug/"${ctrl_dev}"/fault_inject/verbose)
+	ctrl_dev_probability_save=$(cat /sys/kernel/debug/"${ctrl_dev}"/fault_inject/probability)
+	ctrl_dev_dont_retry_save=$(cat /sys/kernel/debug/"${ctrl_dev}"/fault_inject/dont_retry)
+	ctrl_dev_dont_status_save=$(cat /sys/kernel/debug/"${ctrl_dev}"/fault_inject/status)
+	ctrl_dev_dont_times_save=$(cat /sys/kernel/debug/"${ctrl_dev}"/fault_inject/times)
+}
+
+restore_error_inject_attr()
+{
+	echo "${ns_dev_verbose_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/verbose
+	echo "${ns_dev_verbose_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/verbose
+	echo "${ns_dev_probability_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/probability
+	echo "${ns_dev_dont_retry_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/dont_retry
+	echo "${ns_dev_dont_status_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/status
+	echo "${ns_dev_dont_times_save}" > /sys/kernel/debug/"${ns_dev}"/fault_inject/times
+	echo "${ctrl_dev_verbose_save}" > /sys/kernel/debug/"${ctrl_dev}"/fault_inject/verbose 
+	echo "${ctrl_dev_probability_save}" > /sys/kernel/debug/"${ctrl_dev}"/fault_inject/probability
+	echo "${ctrl_dev_dont_retry_save}" > /sys/kernel/debug/"${ctrl_dev}"/fault_inject/dont_retry
+	echo "${ctrl_dev_dont_status_save}" > /sys/kernel/debug/"${ctrl_dev}"/fault_inject/status
+	echo "${ctrl_dev_dont_times_save}" > /sys/kernel/debug/"${ctrl_dev}"/fault_inject/times
+}
+
+set_verbose_prob_retry()
+{
+	echo 0 > /sys/kernel/debug/"$1"/fault_inject/verbose
+	echo 100 > /sys/kernel/debug/"$1"/fault_inject/probability
+	echo 1 > /sys/kernel/debug/"$1"/fault_inject/dont_retry
+}
+
+set_status_time()
+{
+	echo "$1" > /sys/kernel/debug/"$3"/fault_inject/status
+	echo "$2" > /sys/kernel/debug/"$3"/fault_inject/times
+}
+
+inject_unrec_read_err()
+{
+	set_verbose_prob_retry "${ns_dev}"
+#	Inject a 'Unrecovered Read Error' error on a READ
+	set_status_time 0x281 1 "${ns_dev}"
+	dd if="${TEST_DEV}" of=/dev/null bs=512 count=1 iflag=direct 2> /dev/null 1>&2
+	if ${nvme_verbose_errors}; then
+		dmesg -t | tail -2 | grep "Unrecovered Read Error (" | \
+		    sed 's/nvme.*://g'
+	else
+		dmesg -t | tail -2 | grep "Cmd(" | sed 's/I\/O Cmd/Read/g' | \
+		    sed 's/I\/O Error/Unrecovered Read Error/g' | \
+		    sed 's/nvme.*://g'
+	fi
+}
+
+inject_invalid_read_err()
+{
+#	Inject a valid invalid error status (0x375) on a READ
+	set_status_time 0x375 1 "${ns_dev}"
+	dd if="${TEST_DEV}" of=/dev/null bs=512 count=1 iflag=direct 2> /dev/null 1>&2
+	if ${nvme_verbose_errors}; then
+		dmesg -t | tail -2 | grep "Unknown (" | \
+		    sed 's/nvme.*://g'
+	else
+		dmesg -t | tail -2 | grep "Cmd(" | sed 's/I\/O Cmd/Read/g' | \
+		    sed 's/I\/O Error/Unknown/g' | \
+		    sed 's/nvme.*://g'
+	fi
+}
+
+inject_write_fault()
+{
+#	Inject a 'Write Fault' error on a WRITE
+	set_status_time 0x280 1 "${ns_dev}"
+	dd if=/dev/zero of="${TEST_DEV}" bs=512 count=1 oflag=direct 2> /dev/null 1>&2
+	if ${nvme_verbose_errors}; then
+		dmesg -t | tail -2 | grep "Write Fault (" | \
+		    sed 's/nvme.*://g'
+	else
+		dmesg -t | tail -2 | grep "Cmd(" | sed 's/I\/O Cmd/Write/g' | \
+		    sed 's/I\/O Error/Write Fault/g' | \
+		    sed 's/nvme.*://g'
+	fi
+}
+
+inject_id_admin()
+{
+#	Inject a valid (Identify) Admin command
+	set_status_time 0x286 1000 "${ctrl_dev}"
+	nvme admin-passthru /dev/"${ctrl_dev}" --opcode=0x06 --data-len=4096 --cdw10=1 -r 2> /dev/null 1>&2
+	if ${nvme_verbose_errors}; then
+		dmesg -t | tail -1 | grep "Access Denied (" | \
+		    sed 's/nvme.*://g'
+	else
+		dmesg -t | tail -1 | grep "Admin Cmd(" | sed 's/Admin Cmd/Identify/g' | \
+		    sed 's/I\/O Error/Access Denied/g' | \
+		    sed 's/nvme.*://g'
+	fi
+}
+
+inject_invalid_cmd()
+{
+#	Inject an invalid command (0x96)
+	set_status_time 0x1 1 "${ctrl_dev}"
+	nvme admin-passthru /dev/"${ctrl_dev}" --opcode=0x96 --data-len=4096 --cdw10=1 -r 2> /dev/null 1>&2
+	if ${nvme_verbose_errors}; then
+		dmesg -t | tail -1 | grep "Invalid Command Opcode (" | \
+		    sed 's/nvme.*://g'
+	else
+		dmesg -t | tail -1 | grep "Admin Cmd(" | sed 's/Admin Cmd/Unknown/g' | \
+		    sed 's/I\/O Error/Invalid Command Opcode/g' | \
+		    sed 's/nvme.*://g'
+	fi
+}
+
+test_device() {
+	local nvme_verbose_errors;
+	local ns_dev;
+	local ctrl_dev;
+
+	echo "Running ${TEST_NAME}"
+
+	if _have_kernel_option NVME_VERBOSE_ERRORS; then
+		nvme_verbose_errors=true;
+	else
+		unset SKIP_REASON
+		nvme_verbose_errors=false;
+	fi
+
+	ns_dev=$(echo "${TEST_DEV}" | sed 's/\/dev\///g')
+	ctrl_dev=$(echo "${TEST_DEV}" | sed 's/\/dev\///g' | sed 's/n[0-9]*//2')
+
+#	Save Error Injector Attributes
+	save_err_inject_attr
+
+	inject_unrec_read_err
+
+	inject_invalid_read_err
+
+	inject_write_fault
+
+	set_verbose_prob_retry "${ctrl_dev}"
+	inject_id_admin
+
+	inject_invalid_cmd
+
+#	Restore Error Injector Attributes
+	restore_error_inject_attr
+
+	echo "Test complete"
+}
diff --git a/tests/nvme/039.out b/tests/nvme/039.out
new file mode 100644
index 000000000000..162935eb1d7b
--- /dev/null
+++ b/tests/nvme/039.out
@@ -0,0 +1,7 @@ 
+Running nvme/039
+ Read(0x2) @ LBA 0, 1 blocks, Unrecovered Read Error (sct 0x2 / sc 0x81) DNR 
+ Read(0x2) @ LBA 0, 1 blocks, Unknown (sct 0x3 / sc 0x75) DNR 
+ Write(0x1) @ LBA 0, 1 blocks, Write Fault (sct 0x2 / sc 0x80) DNR 
+ Identify(0x6), Access Denied (sct 0x2 / sc 0x86) DNR 
+ Unknown(0x96), Invalid Command Opcode (sct 0x0 / sc 0x1) DNR 
+Test complete