diff mbox series

[ndctl,v7,7/7] cxl/test: add cxl-poison.sh unit test

Message ID 855025e88e0c261ae36dd6bd70443ebd9e7e5e6f.1707351560.git.alison.schofield@intel.com (mailing list archive)
State Superseded
Headers show
Series Support poison list retrieval | expand

Commit Message

Alison Schofield Feb. 8, 2024, 1:01 a.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

Exercise cxl list, libcxl, and driver pieces of the get poison list
pathway. Inject and clear poison using debugfs and use cxl-cli to
read the poison list by memdev and by region.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 test/cxl-poison.sh | 137 +++++++++++++++++++++++++++++++++++++++++++++
 test/meson.build   |   2 +
 2 files changed, 139 insertions(+)
 create mode 100644 test/cxl-poison.sh

Comments

Dave Jiang Feb. 8, 2024, 6:18 p.m. UTC | #1
On 2/7/24 6:01 PM, alison.schofield@intel.com wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> Exercise cxl list, libcxl, and driver pieces of the get poison list
> pathway. Inject and clear poison using debugfs and use cxl-cli to
> read the poison list by memdev and by region.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>

Reviewed-by: Dave Jiang <dave.jiang@intel.com>

Should circle back and use CXL CLI enabled poison injection/clear when that's available. 
> ---
>  test/cxl-poison.sh | 137 +++++++++++++++++++++++++++++++++++++++++++++
>  test/meson.build   |   2 +
>  2 files changed, 139 insertions(+)
>  create mode 100644 test/cxl-poison.sh
> 
> diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
> new file mode 100644
> index 000000000000..6fceb0f2c360
> --- /dev/null
> +++ b/test/cxl-poison.sh
> @@ -0,0 +1,137 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (C) 2023 Intel Corporation. All rights reserved.
> +
> +. "$(dirname "$0")"/common
> +
> +rc=77
> +
> +set -ex
> +
> +trap 'err $LINENO' ERR
> +
> +check_prereq "jq"
> +
> +modprobe -r cxl_test
> +modprobe cxl_test
> +
> +rc=1
> +
> +# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to
> +# inject, clear, and get the poison list. Do it by memdev and by region.
> +
> +find_memdev()
> +{
> +	readarray -t capable_mems < <("$CXL" list -b "$CXL_TEST_BUS" -M |
> +		jq -r ".[] | select(.pmem_size != null) |
> +		select(.ram_size != null) | .memdev")
> +
> +	if [ ${#capable_mems[@]} == 0 ]; then
> +		echo "no memdevs found for test"
> +		err "$LINENO"
> +	fi
> +
> +	memdev=${capable_mems[0]}
> +}
> +
> +create_x2_region()
> +{
> +        # Find an x2 decoder
> +        decoder="$($CXL list -b "$CXL_TEST_BUS" -D -d root | jq -r ".[] |
> +		select(.pmem_capable == true) |
> +		select(.nr_targets == 2) |
> +		.decoder")"
> +
> +        # Find a memdev for each host-bridge interleave position
> +        port_dev0="$($CXL list -T -d "$decoder" | jq -r ".[] |
> +		.targets | .[] | select(.position == 0) | .target")"
> +        port_dev1="$($CXL list -T -d "$decoder" | jq -r ".[] |
> +		.targets | .[] | select(.position == 1) | .target")"
> +        mem0="$($CXL list -M -p "$port_dev0" | jq -r ".[0].memdev")"
> +        mem1="$($CXL list -M -p "$port_dev1" | jq -r ".[0].memdev")"
> +
> +	region="$($CXL create-region -d "$decoder" -m "$mem0" "$mem1" |
> +		 jq -r ".region")"
> +	if [[ ! $region ]]; then
> +		echo "create-region failed for $decoder"
> +		err "$LINENO"
> +	fi
> +	echo "$region"
> +}
> +
> +# When cxl-cli support for inject and clear arrives, replace
> +# the writes to /sys/kernel/debug with the new cxl commands.
> +
> +inject_poison_sysfs()
> +{
> +	memdev="$1"
> +	addr="$2"
> +
> +	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/inject_poison
> +}
> +
> +clear_poison_sysfs()
> +{
> +	memdev="$1"
> +	addr="$2"
> +
> +	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/clear_poison
> +}
> +
> +validate_poison_found()
> +{
> +	list_by="$1"
> +	nr_expect="$2"
> +
> +	poison_list="$($CXL list "$list_by" --media-errors |
> +		jq -r '.[].media_errors')"
> +	if [[ ! $poison_list ]]; then
> +		nr_found=0
> +	else
> +		nr_found=$(jq "length" <<< "$poison_list")
> +	fi
> +	if [ "$nr_found" -ne "$nr_expect" ]; then
> +		echo "$nr_expect poison records expected, $nr_found found"
> +		err "$LINENO"
> +	fi
> +}
> +
> +test_poison_by_memdev()
> +{
> +	find_memdev
> +	inject_poison_sysfs "$memdev" "0x40000000"
> +	inject_poison_sysfs "$memdev" "0x40001000"
> +	inject_poison_sysfs "$memdev" "0x600"
> +	inject_poison_sysfs "$memdev" "0x0"
> +	validate_poison_found "-m $memdev" 4
> +
> +	clear_poison_sysfs "$memdev" "0x40000000"
> +	clear_poison_sysfs "$memdev" "0x40001000"
> +	clear_poison_sysfs "$memdev" "0x600"
> +	clear_poison_sysfs "$memdev" "0x0"
> +	validate_poison_found "-m $memdev" 0
> +}
> +
> +test_poison_by_region()
> +{
> +	create_x2_region
> +	inject_poison_sysfs "$mem0" "0x40000000"
> +	inject_poison_sysfs "$mem1" "0x40000000"
> +	validate_poison_found "-r $region" 2
> +
> +	clear_poison_sysfs "$mem0" "0x40000000"
> +	clear_poison_sysfs "$mem1" "0x40000000"
> +	validate_poison_found "-r $region" 0
> +}
> +
> +# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing.
> +# Turning it on here allows the test user to also view inject and clear
> +# trace events.
> +echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
> +
> +test_poison_by_memdev
> +test_poison_by_region
> +
> +check_dmesg "$LINENO"
> +
> +modprobe -r cxl-test
> diff --git a/test/meson.build b/test/meson.build
> index 224adaf41fcc..2706fa5d633c 100644
> --- a/test/meson.build
> +++ b/test/meson.build
> @@ -157,6 +157,7 @@ cxl_create_region = find_program('cxl-create-region.sh')
>  cxl_xor_region = find_program('cxl-xor-region.sh')
>  cxl_update_firmware = find_program('cxl-update-firmware.sh')
>  cxl_events = find_program('cxl-events.sh')
> +cxl_poison = find_program('cxl-poison.sh')
>  
>  tests = [
>    [ 'libndctl',               libndctl,		  'ndctl' ],
> @@ -186,6 +187,7 @@ tests = [
>    [ 'cxl-create-region.sh',   cxl_create_region,  'cxl'   ],
>    [ 'cxl-xor-region.sh',      cxl_xor_region,     'cxl'   ],
>    [ 'cxl-events.sh',          cxl_events,         'cxl'   ],
> +  [ 'cxl-poison.sh',          cxl_poison,         'cxl'   ],
>  ]
>  
>  if get_option('destructive').enabled()
Verma, Vishal L Feb. 22, 2024, 8:04 a.m. UTC | #2
On Wed, 2024-02-07 at 17:01 -0800, alison.schofield@intel.com wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> Exercise cxl list, libcxl, and driver pieces of the get poison list
> pathway. Inject and clear poison using debugfs and use cxl-cli to
> read the poison list by memdev and by region.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> ---
>  test/cxl-poison.sh | 137 +++++++++++++++++++++++++++++++++++++++++++++
>  test/meson.build   |   2 +
>  2 files changed, 139 insertions(+)
>  create mode 100644 test/cxl-poison.sh
> 
> diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
> new file mode 100644
> index 000000000000..6fceb0f2c360
> --- /dev/null
> +++ b/test/cxl-poison.sh
> @@ -0,0 +1,137 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (C) 2023 Intel Corporation. All rights reserved.
> +
> +. "$(dirname "$0")"/common
> +
> +rc=77
> +
> +set -ex
> +
> +trap 'err $LINENO' ERR
> +
> +check_prereq "jq"
> +
> +modprobe -r cxl_test
> +modprobe cxl_test
> +
> +rc=1
> +
> +# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to
> +# inject, clear, and get the poison list. Do it by memdev and by region.
> +
> +find_memdev()
> +{
> +	readarray -t capable_mems < <("$CXL" list -b "$CXL_TEST_BUS" -M |
> +		jq -r ".[] | select(.pmem_size != null) |
> +		select(.ram_size != null) | .memdev")
> +
> +	if [ ${#capable_mems[@]} == 0 ]; then
> +		echo "no memdevs found for test"
> +		err "$LINENO"
> +	fi
> +
> +	memdev=${capable_mems[0]}
> +}
> +
> +create_x2_region()
> +{
> +        # Find an x2 decoder
> +        decoder="$($CXL list -b "$CXL_TEST_BUS" -D -d root | jq -r ".[] |
> +		select(.pmem_capable == true) |
> +		select(.nr_targets == 2) |
> +		.decoder")"
> +
> +        # Find a memdev for each host-bridge interleave position
> +        port_dev0="$($CXL list -T -d "$decoder" | jq -r ".[] |
> +		.targets | .[] | select(.position == 0) | .target")"
> +        port_dev1="$($CXL list -T -d "$decoder" | jq -r ".[] |
> +		.targets | .[] | select(.position == 1) | .target")"
> +        mem0="$($CXL list -M -p "$port_dev0" | jq -r ".[0].memdev")"
> +        mem1="$($CXL list -M -p "$port_dev1" | jq -r ".[0].memdev")"

Space/tab mixing here. I wonder if one of the earlier tests where some
of this boilerplate comes from has spaces instead of tabs, and has been
creeping into new tests. Would be nice to at least clean this patch up,
and I can send a cleanup for older tests once the dust settles with the
currently in-flux patchsets.

> +
> +	region="$($CXL create-region -d "$decoder" -m "$mem0" "$mem1" |
> +		 jq -r ".region")"
> +	if [[ ! $region ]]; then
> +		echo "create-region failed for $decoder"
> +		err "$LINENO"
> +	fi
> +	echo "$region"
> +}
> +
> +# When cxl-cli support for inject and clear arrives, replace
> +# the writes to /sys/kernel/debug with the new cxl commands.
> +
> +inject_poison_sysfs()
> +{
> +	memdev="$1"
> +	addr="$2"
> +
> +	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/inject_poison
> +}
> +
> +clear_poison_sysfs()
> +{
> +	memdev="$1"
> +	addr="$2"
> +
> +	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/clear_poison
> +}
> +
> +validate_poison_found()
> +{
> +	list_by="$1"
> +	nr_expect="$2"
> +
> +	poison_list="$($CXL list "$list_by" --media-errors |
> +		jq -r '.[].media_errors')"
> +	if [[ ! $poison_list ]]; then
> +		nr_found=0
> +	else
> +		nr_found=$(jq "length" <<< "$poison_list")
> +	fi
> +	if [ "$nr_found" -ne "$nr_expect" ]; then
> +		echo "$nr_expect poison records expected, $nr_found found"
> +		err "$LINENO"
> +	fi
> +}
> +
> +test_poison_by_memdev()
> +{
> +	find_memdev
> +	inject_poison_sysfs "$memdev" "0x40000000"
> +	inject_poison_sysfs "$memdev" "0x40001000"
> +	inject_poison_sysfs "$memdev" "0x600"
> +	inject_poison_sysfs "$memdev" "0x0"
> +	validate_poison_found "-m $memdev" 4
> +
> +	clear_poison_sysfs "$memdev" "0x40000000"
> +	clear_poison_sysfs "$memdev" "0x40001000"
> +	clear_poison_sysfs "$memdev" "0x600"
> +	clear_poison_sysfs "$memdev" "0x0"
> +	validate_poison_found "-m $memdev" 0
> +}
> +
> +test_poison_by_region()
> +{
> +	create_x2_region
> +	inject_poison_sysfs "$mem0" "0x40000000"
> +	inject_poison_sysfs "$mem1" "0x40000000"
> +	validate_poison_found "-r $region" 2
> +
> +	clear_poison_sysfs "$mem0" "0x40000000"
> +	clear_poison_sysfs "$mem1" "0x40000000"
> +	validate_poison_found "-r $region" 0
> +}
> +
> +# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing.
> +# Turning it on here allows the test user to also view inject and clear
> +# trace events.
> +echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
> +
> +test_poison_by_memdev
> +test_poison_by_region
> +
> +check_dmesg "$LINENO"
> +
> +modprobe -r cxl-test
> diff --git a/test/meson.build b/test/meson.build
> index 224adaf41fcc..2706fa5d633c 100644
> --- a/test/meson.build
> +++ b/test/meson.build
> @@ -157,6 +157,7 @@ cxl_create_region = find_program('cxl-create-region.sh')
>  cxl_xor_region = find_program('cxl-xor-region.sh')
>  cxl_update_firmware = find_program('cxl-update-firmware.sh')
>  cxl_events = find_program('cxl-events.sh')
> +cxl_poison = find_program('cxl-poison.sh')
>  
>  tests = [
>    [ 'libndctl',               libndctl,		  'ndctl' ],
> @@ -186,6 +187,7 @@ tests = [
>    [ 'cxl-create-region.sh',   cxl_create_region,  'cxl'   ],
>    [ 'cxl-xor-region.sh',      cxl_xor_region,     'cxl'   ],
>    [ 'cxl-events.sh',          cxl_events,         'cxl'   ],
> +  [ 'cxl-poison.sh',          cxl_poison,         'cxl'   ],
>  ]
>  
>  if get_option('destructive').enabled()
diff mbox series

Patch

diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
new file mode 100644
index 000000000000..6fceb0f2c360
--- /dev/null
+++ b/test/cxl-poison.sh
@@ -0,0 +1,137 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2023 Intel Corporation. All rights reserved.
+
+. "$(dirname "$0")"/common
+
+rc=77
+
+set -ex
+
+trap 'err $LINENO' ERR
+
+check_prereq "jq"
+
+modprobe -r cxl_test
+modprobe cxl_test
+
+rc=1
+
+# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to
+# inject, clear, and get the poison list. Do it by memdev and by region.
+
+find_memdev()
+{
+	readarray -t capable_mems < <("$CXL" list -b "$CXL_TEST_BUS" -M |
+		jq -r ".[] | select(.pmem_size != null) |
+		select(.ram_size != null) | .memdev")
+
+	if [ ${#capable_mems[@]} == 0 ]; then
+		echo "no memdevs found for test"
+		err "$LINENO"
+	fi
+
+	memdev=${capable_mems[0]}
+}
+
+create_x2_region()
+{
+        # Find an x2 decoder
+        decoder="$($CXL list -b "$CXL_TEST_BUS" -D -d root | jq -r ".[] |
+		select(.pmem_capable == true) |
+		select(.nr_targets == 2) |
+		.decoder")"
+
+        # Find a memdev for each host-bridge interleave position
+        port_dev0="$($CXL list -T -d "$decoder" | jq -r ".[] |
+		.targets | .[] | select(.position == 0) | .target")"
+        port_dev1="$($CXL list -T -d "$decoder" | jq -r ".[] |
+		.targets | .[] | select(.position == 1) | .target")"
+        mem0="$($CXL list -M -p "$port_dev0" | jq -r ".[0].memdev")"
+        mem1="$($CXL list -M -p "$port_dev1" | jq -r ".[0].memdev")"
+
+	region="$($CXL create-region -d "$decoder" -m "$mem0" "$mem1" |
+		 jq -r ".region")"
+	if [[ ! $region ]]; then
+		echo "create-region failed for $decoder"
+		err "$LINENO"
+	fi
+	echo "$region"
+}
+
+# When cxl-cli support for inject and clear arrives, replace
+# the writes to /sys/kernel/debug with the new cxl commands.
+
+inject_poison_sysfs()
+{
+	memdev="$1"
+	addr="$2"
+
+	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/inject_poison
+}
+
+clear_poison_sysfs()
+{
+	memdev="$1"
+	addr="$2"
+
+	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/clear_poison
+}
+
+validate_poison_found()
+{
+	list_by="$1"
+	nr_expect="$2"
+
+	poison_list="$($CXL list "$list_by" --media-errors |
+		jq -r '.[].media_errors')"
+	if [[ ! $poison_list ]]; then
+		nr_found=0
+	else
+		nr_found=$(jq "length" <<< "$poison_list")
+	fi
+	if [ "$nr_found" -ne "$nr_expect" ]; then
+		echo "$nr_expect poison records expected, $nr_found found"
+		err "$LINENO"
+	fi
+}
+
+test_poison_by_memdev()
+{
+	find_memdev
+	inject_poison_sysfs "$memdev" "0x40000000"
+	inject_poison_sysfs "$memdev" "0x40001000"
+	inject_poison_sysfs "$memdev" "0x600"
+	inject_poison_sysfs "$memdev" "0x0"
+	validate_poison_found "-m $memdev" 4
+
+	clear_poison_sysfs "$memdev" "0x40000000"
+	clear_poison_sysfs "$memdev" "0x40001000"
+	clear_poison_sysfs "$memdev" "0x600"
+	clear_poison_sysfs "$memdev" "0x0"
+	validate_poison_found "-m $memdev" 0
+}
+
+test_poison_by_region()
+{
+	create_x2_region
+	inject_poison_sysfs "$mem0" "0x40000000"
+	inject_poison_sysfs "$mem1" "0x40000000"
+	validate_poison_found "-r $region" 2
+
+	clear_poison_sysfs "$mem0" "0x40000000"
+	clear_poison_sysfs "$mem1" "0x40000000"
+	validate_poison_found "-r $region" 0
+}
+
+# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing.
+# Turning it on here allows the test user to also view inject and clear
+# trace events.
+echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
+
+test_poison_by_memdev
+test_poison_by_region
+
+check_dmesg "$LINENO"
+
+modprobe -r cxl-test
diff --git a/test/meson.build b/test/meson.build
index 224adaf41fcc..2706fa5d633c 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -157,6 +157,7 @@  cxl_create_region = find_program('cxl-create-region.sh')
 cxl_xor_region = find_program('cxl-xor-region.sh')
 cxl_update_firmware = find_program('cxl-update-firmware.sh')
 cxl_events = find_program('cxl-events.sh')
+cxl_poison = find_program('cxl-poison.sh')
 
 tests = [
   [ 'libndctl',               libndctl,		  'ndctl' ],
@@ -186,6 +187,7 @@  tests = [
   [ 'cxl-create-region.sh',   cxl_create_region,  'cxl'   ],
   [ 'cxl-xor-region.sh',      cxl_xor_region,     'cxl'   ],
   [ 'cxl-events.sh',          cxl_events,         'cxl'   ],
+  [ 'cxl-poison.sh',          cxl_poison,         'cxl'   ],
 ]
 
 if get_option('destructive').enabled()