diff mbox series

[ndctl,v3,5/5] cxl/test: add cxl-poison.sh unit test

Message ID 2c7aa46e399738867b21bb35120196310ed2613d.1700258145.git.alison.schofield@intel.com (mailing list archive)
State Superseded
Headers show
Series Support poison list retrieval | expand

Commit Message

Alison Schofield Nov. 17, 2023, 10:35 p.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

Exercise cxl list, libcxl, and driver pieces of the get poison list
pathway. Inject and clear poison using debugfs and use cxl-cli to
read the poison list by memdev and by region.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 test/cxl-poison.sh | 135 +++++++++++++++++++++++++++++++++++++++++++++
 test/meson.build   |   2 +
 2 files changed, 137 insertions(+)
 create mode 100644 test/cxl-poison.sh

Comments

Vishal Verma Nov. 17, 2023, 11:20 p.m. UTC | #1
On Fri, 2023-11-17 at 14:35 -0800, alison.schofield@intel.com wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
[..]

Rest of the series is looking good, just a few minor things below.

> 
> +
> +find_media_errors()
> +{
> +       local json="$1"
> +
> +       nr="$(jq -r ".nr_records" <<< "$json")"
> +       if [[ $nr != $NR_ERRS ]]; then

Minor shellcheck complaint, the right hand side of a [[ ]] check should
be quoted, so [[ $nr != "$NR_ERRS" ]]

> +               echo "$mem: $NR_ERRS poison records expected, $nr found"

$mem is never set, maybe it needs to be extracted from the json above?

> +               err "$LINENO"
> +       fi
> +}
> +
> +# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing.
> +# Turning it on here allows the test user to also view inject and clear
> +# trace events.
> +echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
> +
> +# Poison by memdev
> +# Inject then clear into cxl_test known pmem and ram partitions
> +find_memdev
> +inject_poison_sysfs "$memdev" "0x40000000"
> +inject_poison_sysfs "$memdev" "0x40001000"
> +inject_poison_sysfs "$memdev" "0x600"
> +inject_poison_sysfs "$memdev" "0x0"
> +NR_ERRS=4
> +json=$("$CXL" list -m "$memdev" --poison | jq -r '.[].poison')
> +find_media_errors "$json"

Instead of setting NR_ERRS 'globally', just pass it to the
find_media_errors function as well alongside $json, and maybe rename it
to validate_nr_records() or something. More generaly, no need to
capitalize something like NR_ERRS - all caps is usually only for
variables coming from the env.

> +clear_poison_sysfs "$memdev" "0x40000000"
> +clear_poison_sysfs "$memdev" "0x40001000"
> +clear_poison_sysfs "$memdev" "0x600"
> +clear_poison_sysfs "$memdev" "0x0"
> +NR_ERRS=0
> +json=$("$CXL" list -m "$memdev" --poison | jq -r '.[].poison')

Fairly minor but shellcheck complains about quoting all the "$()"
command substitutions.

> +find_media_errors "$json"
> +
> +# Poison by region
> +# Inject then clear into cxl_test known pmem dpa mappings
> +create_region
> +inject_poison_sysfs "$mem0" "0x40000000"
> +inject_poison_sysfs "$mem1" "0x40000000"
> +NR_ERRS=2
> +json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison')
> +find_media_errors "$json"
> +clear_poison_sysfs "$mem0" "0x40000000"
> +clear_poison_sysfs "$mem1" "0x40000000"
> +NR_ERRS=0
> +json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison')
> +find_media_errors "$json"
> +
> +check_dmesg "$LINENO"
> +
> +modprobe -r cxl-test
> diff --git a/test/meson.build b/test/meson.build
> index 224adaf41fcc..2706fa5d633c 100644
> --- a/test/meson.build
> +++ b/test/meson.build
> @@ -157,6 +157,7 @@ cxl_create_region = find_program('cxl-create-region.sh')
>  cxl_xor_region = find_program('cxl-xor-region.sh')
>  cxl_update_firmware = find_program('cxl-update-firmware.sh')
>  cxl_events = find_program('cxl-events.sh')
> +cxl_poison = find_program('cxl-poison.sh')
>  
>  tests = [
>    [ 'libndctl',               libndctl,                  'ndctl' ],
> @@ -186,6 +187,7 @@ tests = [
>    [ 'cxl-create-region.sh',   cxl_create_region,  'cxl'   ],
>    [ 'cxl-xor-region.sh',      cxl_xor_region,     'cxl'   ],
>    [ 'cxl-events.sh',          cxl_events,         'cxl'   ],
> +  [ 'cxl-poison.sh',          cxl_poison,         'cxl'   ],
>  ]
>  
>  if get_option('destructive').enabled()
diff mbox series

Patch

diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
new file mode 100644
index 000000000000..a562153c8324
--- /dev/null
+++ b/test/cxl-poison.sh
@@ -0,0 +1,135 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2022 Intel Corporation. All rights reserved.
+
+. $(dirname $0)/common
+
+rc=77
+
+set -ex
+
+trap 'err $LINENO' ERR
+
+check_prereq "jq"
+
+modprobe -r cxl_test
+modprobe cxl_test
+
+rc=1
+
+# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to
+# inject, clear, and get the poison list. Do it by memdev and by region.
+# Based on current cxl-test topology.
+
+find_memdev()
+{
+	readarray -t capable_mems < <("$CXL" list -b "$CXL_TEST_BUS" -M |
+		jq -r ".[] | select(.pmem_size != null) |
+	       	select(.ram_size != null) | .memdev")
+
+	if [ ${#capable_mems[@]} == 0 ]; then
+		echo "no memdevs found for test"
+		err "$LINENO"
+	fi
+
+	memdev=${capable_mems[0]}
+}
+
+setup_x2_region()
+{
+        # Find an x2 decoder
+        decoder=$($CXL list -b "$CXL_TEST_BUS" -D -d root | jq -r ".[] |
+          select(.pmem_capable == true) |
+          select(.nr_targets == 2) |
+          .decoder")
+
+        # Find a memdev for each host-bridge interleave position
+        port_dev0=$($CXL list -T -d $decoder | jq -r ".[] |
+            .targets | .[] | select(.position == 0) | .target")
+        port_dev1=$($CXL list -T -d $decoder | jq -r ".[] |
+            .targets | .[] | select(.position == 1) | .target")
+        mem0=$($CXL list -M -p $port_dev0 | jq -r ".[0].memdev")
+        mem1=$($CXL list -M -p $port_dev1 | jq -r ".[0].memdev")
+        memdevs="$mem0 $mem1"
+}
+
+create_region()
+{
+	setup_x2_region
+	region=$($CXL create-region -d $decoder -m $memdevs | jq -r ".region")
+	if [[ ! $region ]]; then
+		echo "create-region failed for $decoder"
+		err "$LINENO"
+	fi
+}
+
+# When cxl-cli support for inject and clear arrives, replace
+# the writes to /sys/kernel/debug with the new cxl commands.
+
+inject_poison_sysfs()
+{
+	memdev="$1"
+	addr="$2"
+
+	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/inject_poison
+}
+
+clear_poison_sysfs()
+{
+	memdev="$1"
+	addr="$2"
+
+	echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/clear_poison
+}
+
+find_media_errors()
+{
+	local json="$1"
+
+	nr="$(jq -r ".nr_records" <<< "$json")"
+	if [[ $nr != $NR_ERRS ]]; then
+		echo "$mem: $NR_ERRS poison records expected, $nr found"
+		err "$LINENO"
+	fi
+}
+
+# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing.
+# Turning it on here allows the test user to also view inject and clear
+# trace events.
+echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
+
+# Poison by memdev
+# Inject then clear into cxl_test known pmem and ram partitions
+find_memdev
+inject_poison_sysfs "$memdev" "0x40000000"
+inject_poison_sysfs "$memdev" "0x40001000"
+inject_poison_sysfs "$memdev" "0x600"
+inject_poison_sysfs "$memdev" "0x0"
+NR_ERRS=4
+json=$("$CXL" list -m "$memdev" --poison | jq -r '.[].poison')
+find_media_errors "$json"
+clear_poison_sysfs "$memdev" "0x40000000"
+clear_poison_sysfs "$memdev" "0x40001000"
+clear_poison_sysfs "$memdev" "0x600"
+clear_poison_sysfs "$memdev" "0x0"
+NR_ERRS=0
+json=$("$CXL" list -m "$memdev" --poison | jq -r '.[].poison')
+find_media_errors "$json"
+
+# Poison by region
+# Inject then clear into cxl_test known pmem dpa mappings
+create_region
+inject_poison_sysfs "$mem0" "0x40000000"
+inject_poison_sysfs "$mem1" "0x40000000"
+NR_ERRS=2
+json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison')
+find_media_errors "$json"
+clear_poison_sysfs "$mem0" "0x40000000"
+clear_poison_sysfs "$mem1" "0x40000000"
+NR_ERRS=0
+json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison')
+find_media_errors "$json"
+
+check_dmesg "$LINENO"
+
+modprobe -r cxl-test
diff --git a/test/meson.build b/test/meson.build
index 224adaf41fcc..2706fa5d633c 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -157,6 +157,7 @@  cxl_create_region = find_program('cxl-create-region.sh')
 cxl_xor_region = find_program('cxl-xor-region.sh')
 cxl_update_firmware = find_program('cxl-update-firmware.sh')
 cxl_events = find_program('cxl-events.sh')
+cxl_poison = find_program('cxl-poison.sh')
 
 tests = [
   [ 'libndctl',               libndctl,		  'ndctl' ],
@@ -186,6 +187,7 @@  tests = [
   [ 'cxl-create-region.sh',   cxl_create_region,  'cxl'   ],
   [ 'cxl-xor-region.sh',      cxl_xor_region,     'cxl'   ],
   [ 'cxl-events.sh',          cxl_events,         'cxl'   ],
+  [ 'cxl-poison.sh',          cxl_poison,         'cxl'   ],
 ]
 
 if get_option('destructive').enabled()