diff mbox series

[ndctl,v11,7/7] cxl/test: add cxl-poison.sh unit test

Message ID 24c1f2ec413f92e8e6e8817b3d4d55f5bb142849.1710386468.git.alison.schofield@intel.com (mailing list archive)
State Superseded
Headers show
Series Support poison list retrieval | expand

Commit Message

Alison Schofield March 14, 2024, 4:05 a.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

Exercise cxl list, libcxl, and driver pieces of the get poison list
pathway. Inject and clear poison using debugfs and use cxl-cli to
read the poison list by memdev and by region.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 test/cxl-poison.sh | 137 +++++++++++++++++++++++++++++++++++++++++++++
 test/meson.build   |   2 +
 2 files changed, 139 insertions(+)
 create mode 100644 test/cxl-poison.sh

Comments

Dave Jiang March 15, 2024, 5:03 p.m. UTC | #1
On 3/13/24 9:05 PM, alison.schofield@intel.com wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> Exercise cxl list, libcxl, and driver pieces of the get poison list
> pathway. Inject and clear poison using debugfs and use cxl-cli to
> read the poison list by memdev and by region.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  test/cxl-poison.sh | 137 +++++++++++++++++++++++++++++++++++++++++++++
>  test/meson.build   |   2 +
>  2 files changed, 139 insertions(+)
>  create mode 100644 test/cxl-poison.sh
> 
> diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
> new file mode 100644
> index 000000000000..af2e9dcd1a11
> --- /dev/null
> +++ b/test/cxl-poison.sh
> @@ -0,0 +1,137 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (C) 2023 Intel Corporation. All rights reserved.
> +
> +. "$(dirname "$0")"/common
> +
> +rc=77
> +
> +set -ex
> +
> +trap 'err $LINENO' ERR
> +
> +check_prereq "jq"
> +
> +modprobe -r cxl_test
> +modprobe cxl_test
> +
> +rc=1
> +
> +# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to
> +# inject, clear, and get the poison list. Do it by memdev and by region.
> +
> +find_memdev()
> +{
> +	readarray -t capable_mems < <("$CXL" list -b "$CXL_TEST_BUS" -M |
> +		jq -r ".[] | select(.pmem_size != null) |
> +		select(.ram_size != null) | .memdev")
> +
> +	if [ ${#capable_mems[@]} == 0 ]; then
> +		echo "no memdevs found for test"
> +		err "$LINENO"
> +	fi
> +
> +	memdev=${capable_mems[0]}
> +}
> +
> +create_x2_region()
> +{
> +	# Find an x2 decoder
> +	decoder="$($CXL list -b "$CXL_TEST_BUS" -D -d root | jq -r ".[] |
> +		select(.pmem_capable == true) |
> +		select(.nr_targets == 2) |
> +		.decoder")"
> +
> +	# Find a memdev for each host-bridge interleave position
> +	port_dev0="$($CXL list -T -d "$decoder" | jq -r ".[] |
> +		.targets | .[] | select(.position == 0) | .target")"
> +	port_dev1="$($CXL list -T -d "$decoder" | jq -r ".[] |
> +		.targets | .[] | select(.position == 1) | .target")"
> +	mem0="$($CXL list -M -p "$port_dev0" | jq -r ".[0].memdev")"
> +	mem1="$($CXL list -M -p "$port_dev1" | jq -r ".[0].memdev")"
> +
> +	region="$($CXL create-region -d "$decoder" -m "$mem0" "$mem1" |
> +		jq -r ".region")"
> +	if [[ ! $region ]]; then
> +		echo "create-region failed for $decoder"
> +		err "$LINENO"
> +	fi
> +	echo "$region"
> +}
> +
> +# When cxl-cli support for inject and clear arrives, replace
> +# the writes to /sys/kernel/debug with the new cxl commands.
> +
> +inject_poison_sysfs()
> +{
> +	memdev="$1"
> +	addr="$2"
> +
> +	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/inject_poison
> +}
> +
> +clear_poison_sysfs()
> +{
> +	memdev="$1"
> +	addr="$2"
> +
> +	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/clear_poison
> +}
> +
> +validate_poison_found()
> +{
> +	list_by="$1"
> +	nr_expect="$2"
> +
> +	poison_list="$($CXL list "$list_by" --media-errors |
> +		jq -r '.[].media_errors')"
> +	if [[ ! $poison_list ]]; then
> +		nr_found=0
> +	else
> +		nr_found=$(jq "length" <<< "$poison_list")
> +	fi
> +	if [ "$nr_found" -ne "$nr_expect" ]; then
> +		echo "$nr_expect poison records expected, $nr_found found"
> +		err "$LINENO"
> +	fi
> +}
> +
> +test_poison_by_memdev()
> +{
> +	find_memdev
> +	inject_poison_sysfs "$memdev" "0x40000000"
> +	inject_poison_sysfs "$memdev" "0x40001000"
> +	inject_poison_sysfs "$memdev" "0x600"
> +	inject_poison_sysfs "$memdev" "0x0"
> +	validate_poison_found "-m $memdev" 4
> +
> +	clear_poison_sysfs "$memdev" "0x40000000"
> +	clear_poison_sysfs "$memdev" "0x40001000"
> +	clear_poison_sysfs "$memdev" "0x600"
> +	clear_poison_sysfs "$memdev" "0x0"
> +	validate_poison_found "-m $memdev" 0
> +}
> +
> +test_poison_by_region()
> +{
> +	create_x2_region
> +	inject_poison_sysfs "$mem0" "0x40000000"
> +	inject_poison_sysfs "$mem1" "0x40000000"
> +	validate_poison_found "-r $region" 2
> +
> +	clear_poison_sysfs "$mem0" "0x40000000"
> +	clear_poison_sysfs "$mem1" "0x40000000"
> +	validate_poison_found "-r $region" 0
> +}
> +
> +# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing.
> +# Turning it on here allows the test user to also view inject and clear
> +# trace events.
> +echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
> +
> +test_poison_by_memdev
> +test_poison_by_region
> +
> +check_dmesg "$LINENO"
> +
> +modprobe -r cxl-test
> diff --git a/test/meson.build b/test/meson.build
> index a965a79fd6cb..d871e28e17ce 100644
> --- a/test/meson.build
> +++ b/test/meson.build
> @@ -160,6 +160,7 @@ cxl_events = find_program('cxl-events.sh')
>  cxl_sanitize = find_program('cxl-sanitize.sh')
>  cxl_destroy_region = find_program('cxl-destroy-region.sh')
>  cxl_qos_class = find_program('cxl-qos-class.sh')
> +cxl_poison = find_program('cxl-poison.sh')
>  
>  tests = [
>    [ 'libndctl',               libndctl,		  'ndctl' ],
> @@ -192,6 +193,7 @@ tests = [
>    [ 'cxl-sanitize.sh',        cxl_sanitize,       'cxl'   ],
>    [ 'cxl-destroy-region.sh',  cxl_destroy_region, 'cxl'   ],
>    [ 'cxl-qos-class.sh',       cxl_qos_class,      'cxl'   ],
> +  [ 'cxl-poison.sh',          cxl_poison,         'cxl'   ],
>  ]
>  
>  if get_option('destructive').enabled()
Wonjae Lee March 15, 2024, 11:03 p.m. UTC | #2
alison.schofield@intel.com wrote:
> From: Alison Schofield <alison.schofield@intel.com>
>
> Exercise cxl list, libcxl, and driver pieces of the get poison list
> pathway. Inject and clear poison using debugfs and use cxl-cli to
> read the poison list by memdev and by region.
>
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> ---
> test/cxl-poison.sh 137 +++++++++++++++++++++++++++++++++++++++++++++
> test/meson.build    2 +
> 2 files changed, 139 insertions(+)
> create mode 100644 test/cxl-poison.sh
>
> diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
> new file mode 100644
> index 000000000000..af2e9dcd1a11
> --- /dev/null
> +++ b/test/cxl-poison.sh

[snip]

> +# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing.

Hi,

I know it's trivial and not sure if I'm understanding the history of
the patch series correctly, but --poison seems to be an option that
was suggested before --media-errors. I'm wondering if it's okay to
leave this comment.

Thanks,
Wonjae
Alison Schofield March 18, 2024, 5:17 p.m. UTC | #3
On Sat, Mar 16, 2024 at 08:03:34AM +0900, Wonjae Lee wrote:
> alison.schofield@intel.com wrote:
> > From: Alison Schofield <alison.schofield@intel.com>
> >
> > Exercise cxl list, libcxl, and driver pieces of the get poison list
> > pathway. Inject and clear poison using debugfs and use cxl-cli to
> > read the poison list by memdev and by region.
> >
> > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> > ---
> > test/cxl-poison.sh 137 +++++++++++++++++++++++++++++++++++++++++++++
> > test/meson.build    2 +
> > 2 files changed, 139 insertions(+)
> > create mode 100644 test/cxl-poison.sh
> >
> > diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
> > new file mode 100644
> > index 000000000000..af2e9dcd1a11
> > --- /dev/null
> > +++ b/test/cxl-poison.sh
> 
> [snip]
> 
> > +# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing.
> 
> Hi,
> 
> I know it's trivial and not sure if I'm understanding the history of
> the patch series correctly, but --poison seems to be an option that
> was suggested before --media-errors. I'm wondering if it's okay to
> leave this comment.

Thanks Wonjae - I appreciate your find. I'll fix it up.
Alison

> 
> Thanks,
> Wonjae
diff mbox series

Patch

diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
new file mode 100644
index 000000000000..af2e9dcd1a11
--- /dev/null
+++ b/test/cxl-poison.sh
@@ -0,0 +1,137 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2023 Intel Corporation. All rights reserved.
+
+. "$(dirname "$0")"/common
+
+rc=77
+
+set -ex
+
+trap 'err $LINENO' ERR
+
+check_prereq "jq"
+
+modprobe -r cxl_test
+modprobe cxl_test
+
+rc=1
+
+# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to
+# inject, clear, and get the poison list. Do it by memdev and by region.
+
+find_memdev()
+{
+	readarray -t capable_mems < <("$CXL" list -b "$CXL_TEST_BUS" -M |
+		jq -r ".[] | select(.pmem_size != null) |
+		select(.ram_size != null) | .memdev")
+
+	if [ ${#capable_mems[@]} == 0 ]; then
+		echo "no memdevs found for test"
+		err "$LINENO"
+	fi
+
+	memdev=${capable_mems[0]}
+}
+
+create_x2_region()
+{
+	# Find an x2 decoder
+	decoder="$($CXL list -b "$CXL_TEST_BUS" -D -d root | jq -r ".[] |
+		select(.pmem_capable == true) |
+		select(.nr_targets == 2) |
+		.decoder")"
+
+	# Find a memdev for each host-bridge interleave position
+	port_dev0="$($CXL list -T -d "$decoder" | jq -r ".[] |
+		.targets | .[] | select(.position == 0) | .target")"
+	port_dev1="$($CXL list -T -d "$decoder" | jq -r ".[] |
+		.targets | .[] | select(.position == 1) | .target")"
+	mem0="$($CXL list -M -p "$port_dev0" | jq -r ".[0].memdev")"
+	mem1="$($CXL list -M -p "$port_dev1" | jq -r ".[0].memdev")"
+
+	region="$($CXL create-region -d "$decoder" -m "$mem0" "$mem1" |
+		jq -r ".region")"
+	if [[ ! $region ]]; then
+		echo "create-region failed for $decoder"
+		err "$LINENO"
+	fi
+	echo "$region"
+}
+
+# When cxl-cli support for inject and clear arrives, replace
+# the writes to /sys/kernel/debug with the new cxl commands.
+
+inject_poison_sysfs()
+{
+	memdev="$1"
+	addr="$2"
+
+	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/inject_poison
+}
+
+clear_poison_sysfs()
+{
+	memdev="$1"
+	addr="$2"
+
+	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/clear_poison
+}
+
+validate_poison_found()
+{
+	list_by="$1"
+	nr_expect="$2"
+
+	poison_list="$($CXL list "$list_by" --media-errors |
+		jq -r '.[].media_errors')"
+	if [[ ! $poison_list ]]; then
+		nr_found=0
+	else
+		nr_found=$(jq "length" <<< "$poison_list")
+	fi
+	if [ "$nr_found" -ne "$nr_expect" ]; then
+		echo "$nr_expect poison records expected, $nr_found found"
+		err "$LINENO"
+	fi
+}
+
+test_poison_by_memdev()
+{
+	find_memdev
+	inject_poison_sysfs "$memdev" "0x40000000"
+	inject_poison_sysfs "$memdev" "0x40001000"
+	inject_poison_sysfs "$memdev" "0x600"
+	inject_poison_sysfs "$memdev" "0x0"
+	validate_poison_found "-m $memdev" 4
+
+	clear_poison_sysfs "$memdev" "0x40000000"
+	clear_poison_sysfs "$memdev" "0x40001000"
+	clear_poison_sysfs "$memdev" "0x600"
+	clear_poison_sysfs "$memdev" "0x0"
+	validate_poison_found "-m $memdev" 0
+}
+
+test_poison_by_region()
+{
+	create_x2_region
+	inject_poison_sysfs "$mem0" "0x40000000"
+	inject_poison_sysfs "$mem1" "0x40000000"
+	validate_poison_found "-r $region" 2
+
+	clear_poison_sysfs "$mem0" "0x40000000"
+	clear_poison_sysfs "$mem1" "0x40000000"
+	validate_poison_found "-r $region" 0
+}
+
+# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing.
+# Turning it on here allows the test user to also view inject and clear
+# trace events.
+echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
+
+test_poison_by_memdev
+test_poison_by_region
+
+check_dmesg "$LINENO"
+
+modprobe -r cxl-test
diff --git a/test/meson.build b/test/meson.build
index a965a79fd6cb..d871e28e17ce 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -160,6 +160,7 @@  cxl_events = find_program('cxl-events.sh')
 cxl_sanitize = find_program('cxl-sanitize.sh')
 cxl_destroy_region = find_program('cxl-destroy-region.sh')
 cxl_qos_class = find_program('cxl-qos-class.sh')
+cxl_poison = find_program('cxl-poison.sh')
 
 tests = [
   [ 'libndctl',               libndctl,		  'ndctl' ],
@@ -192,6 +193,7 @@  tests = [
   [ 'cxl-sanitize.sh',        cxl_sanitize,       'cxl'   ],
   [ 'cxl-destroy-region.sh',  cxl_destroy_region, 'cxl'   ],
   [ 'cxl-qos-class.sh',       cxl_qos_class,      'cxl'   ],
+  [ 'cxl-poison.sh',          cxl_poison,         'cxl'   ],
 ]
 
 if get_option('destructive').enabled()