Message ID | 24c1f2ec413f92e8e6e8817b3d4d55f5bb142849.1710386468.git.alison.schofield@intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Support poison list retrieval | expand |
On 3/13/24 9:05 PM, alison.schofield@intel.com wrote: > From: Alison Schofield <alison.schofield@intel.com> > > Exercise cxl list, libcxl, and driver pieces of the get poison list > pathway. Inject and clear poison using debugfs and use cxl-cli to > read the poison list by memdev and by region. > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> > --- > test/cxl-poison.sh | 137 +++++++++++++++++++++++++++++++++++++++++++++ > test/meson.build | 2 + > 2 files changed, 139 insertions(+) > create mode 100644 test/cxl-poison.sh > > diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh > new file mode 100644 > index 000000000000..af2e9dcd1a11 > --- /dev/null > +++ b/test/cxl-poison.sh > @@ -0,0 +1,137 @@ > +#!/bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# Copyright (C) 2023 Intel Corporation. All rights reserved. > + > +. "$(dirname "$0")"/common > + > +rc=77 > + > +set -ex > + > +trap 'err $LINENO' ERR > + > +check_prereq "jq" > + > +modprobe -r cxl_test > +modprobe cxl_test > + > +rc=1 > + > +# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to > +# inject, clear, and get the poison list. Do it by memdev and by region. > + > +find_memdev() > +{ > + readarray -t capable_mems < <("$CXL" list -b "$CXL_TEST_BUS" -M | > + jq -r ".[] | select(.pmem_size != null) | > + select(.ram_size != null) | .memdev") > + > + if [ ${#capable_mems[@]} == 0 ]; then > + echo "no memdevs found for test" > + err "$LINENO" > + fi > + > + memdev=${capable_mems[0]} > +} > + > +create_x2_region() > +{ > + # Find an x2 decoder > + decoder="$($CXL list -b "$CXL_TEST_BUS" -D -d root | jq -r ".[] | > + select(.pmem_capable == true) | > + select(.nr_targets == 2) | > + .decoder")" > + > + # Find a memdev for each host-bridge interleave position > + port_dev0="$($CXL list -T -d "$decoder" | jq -r ".[] | > + .targets | .[] | select(.position == 0) | .target")" > + port_dev1="$($CXL list -T -d "$decoder" | jq -r ".[] | > + .targets | .[] | select(.position == 1) | .target")" > + mem0="$($CXL list -M -p "$port_dev0" | jq -r ".[0].memdev")" > + mem1="$($CXL list -M -p "$port_dev1" | jq -r ".[0].memdev")" > + > + region="$($CXL create-region -d "$decoder" -m "$mem0" "$mem1" | > + jq -r ".region")" > + if [[ ! $region ]]; then > + echo "create-region failed for $decoder" > + err "$LINENO" > + fi > + echo "$region" > +} > + > +# When cxl-cli support for inject and clear arrives, replace > +# the writes to /sys/kernel/debug with the new cxl commands. > + > +inject_poison_sysfs() > +{ > + memdev="$1" > + addr="$2" > + > + echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/inject_poison > +} > + > +clear_poison_sysfs() > +{ > + memdev="$1" > + addr="$2" > + > + echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/clear_poison > +} > + > +validate_poison_found() > +{ > + list_by="$1" > + nr_expect="$2" > + > + poison_list="$($CXL list "$list_by" --media-errors | > + jq -r '.[].media_errors')" > + if [[ ! $poison_list ]]; then > + nr_found=0 > + else > + nr_found=$(jq "length" <<< "$poison_list") > + fi > + if [ "$nr_found" -ne "$nr_expect" ]; then > + echo "$nr_expect poison records expected, $nr_found found" > + err "$LINENO" > + fi > +} > + > +test_poison_by_memdev() > +{ > + find_memdev > + inject_poison_sysfs "$memdev" "0x40000000" > + inject_poison_sysfs "$memdev" "0x40001000" > + inject_poison_sysfs "$memdev" "0x600" > + inject_poison_sysfs "$memdev" "0x0" > + validate_poison_found "-m $memdev" 4 > + > + clear_poison_sysfs "$memdev" "0x40000000" > + clear_poison_sysfs "$memdev" "0x40001000" > + clear_poison_sysfs "$memdev" "0x600" > + clear_poison_sysfs "$memdev" "0x0" > + validate_poison_found "-m $memdev" 0 > +} > + > +test_poison_by_region() > +{ > + create_x2_region > + inject_poison_sysfs "$mem0" "0x40000000" > + inject_poison_sysfs "$mem1" "0x40000000" > + validate_poison_found "-r $region" 2 > + > + clear_poison_sysfs "$mem0" "0x40000000" > + clear_poison_sysfs "$mem1" "0x40000000" > + validate_poison_found "-r $region" 0 > +} > + > +# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing. > +# Turning it on here allows the test user to also view inject and clear > +# trace events. > +echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable > + > +test_poison_by_memdev > +test_poison_by_region > + > +check_dmesg "$LINENO" > + > +modprobe -r cxl-test > diff --git a/test/meson.build b/test/meson.build > index a965a79fd6cb..d871e28e17ce 100644 > --- a/test/meson.build > +++ b/test/meson.build > @@ -160,6 +160,7 @@ cxl_events = find_program('cxl-events.sh') > cxl_sanitize = find_program('cxl-sanitize.sh') > cxl_destroy_region = find_program('cxl-destroy-region.sh') > cxl_qos_class = find_program('cxl-qos-class.sh') > +cxl_poison = find_program('cxl-poison.sh') > > tests = [ > [ 'libndctl', libndctl, 'ndctl' ], > @@ -192,6 +193,7 @@ tests = [ > [ 'cxl-sanitize.sh', cxl_sanitize, 'cxl' ], > [ 'cxl-destroy-region.sh', cxl_destroy_region, 'cxl' ], > [ 'cxl-qos-class.sh', cxl_qos_class, 'cxl' ], > + [ 'cxl-poison.sh', cxl_poison, 'cxl' ], > ] > > if get_option('destructive').enabled()
alison.schofield@intel.com wrote: > From: Alison Schofield <alison.schofield@intel.com> > > Exercise cxl list, libcxl, and driver pieces of the get poison list > pathway. Inject and clear poison using debugfs and use cxl-cli to > read the poison list by memdev and by region. > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > --- > test/cxl-poison.sh 137 +++++++++++++++++++++++++++++++++++++++++++++ > test/meson.build 2 + > 2 files changed, 139 insertions(+) > create mode 100644 test/cxl-poison.sh > > diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh > new file mode 100644 > index 000000000000..af2e9dcd1a11 > --- /dev/null > +++ b/test/cxl-poison.sh [snip] > +# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing. Hi, I know it's trivial and not sure if I'm understanding the history of the patch series correctly, but --poison seems to be an option that was suggested before --media-errors. I'm wondering if it's okay to leave this comment. Thanks, Wonjae
On Sat, Mar 16, 2024 at 08:03:34AM +0900, Wonjae Lee wrote: > alison.schofield@intel.com wrote: > > From: Alison Schofield <alison.schofield@intel.com> > > > > Exercise cxl list, libcxl, and driver pieces of the get poison list > > pathway. Inject and clear poison using debugfs and use cxl-cli to > > read the poison list by memdev and by region. > > > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > > --- > > test/cxl-poison.sh 137 +++++++++++++++++++++++++++++++++++++++++++++ > > test/meson.build 2 + > > 2 files changed, 139 insertions(+) > > create mode 100644 test/cxl-poison.sh > > > > diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh > > new file mode 100644 > > index 000000000000..af2e9dcd1a11 > > --- /dev/null > > +++ b/test/cxl-poison.sh > > [snip] > > > +# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing. > > Hi, > > I know it's trivial and not sure if I'm understanding the history of > the patch series correctly, but --poison seems to be an option that > was suggested before --media-errors. I'm wondering if it's okay to > leave this comment. Thanks Wonjae - I appreciate your find. I'll fix it up. Alison > > Thanks, > Wonjae
diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh new file mode 100644 index 000000000000..af2e9dcd1a11 --- /dev/null +++ b/test/cxl-poison.sh @@ -0,0 +1,137 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2023 Intel Corporation. All rights reserved. + +. "$(dirname "$0")"/common + +rc=77 + +set -ex + +trap 'err $LINENO' ERR + +check_prereq "jq" + +modprobe -r cxl_test +modprobe cxl_test + +rc=1 + +# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to +# inject, clear, and get the poison list. Do it by memdev and by region. + +find_memdev() +{ + readarray -t capable_mems < <("$CXL" list -b "$CXL_TEST_BUS" -M | + jq -r ".[] | select(.pmem_size != null) | + select(.ram_size != null) | .memdev") + + if [ ${#capable_mems[@]} == 0 ]; then + echo "no memdevs found for test" + err "$LINENO" + fi + + memdev=${capable_mems[0]} +} + +create_x2_region() +{ + # Find an x2 decoder + decoder="$($CXL list -b "$CXL_TEST_BUS" -D -d root | jq -r ".[] | + select(.pmem_capable == true) | + select(.nr_targets == 2) | + .decoder")" + + # Find a memdev for each host-bridge interleave position + port_dev0="$($CXL list -T -d "$decoder" | jq -r ".[] | + .targets | .[] | select(.position == 0) | .target")" + port_dev1="$($CXL list -T -d "$decoder" | jq -r ".[] | + .targets | .[] | select(.position == 1) | .target")" + mem0="$($CXL list -M -p "$port_dev0" | jq -r ".[0].memdev")" + mem1="$($CXL list -M -p "$port_dev1" | jq -r ".[0].memdev")" + + region="$($CXL create-region -d "$decoder" -m "$mem0" "$mem1" | + jq -r ".region")" + if [[ ! $region ]]; then + echo "create-region failed for $decoder" + err "$LINENO" + fi + echo "$region" +} + +# When cxl-cli support for inject and clear arrives, replace +# the writes to /sys/kernel/debug with the new cxl commands. + +inject_poison_sysfs() +{ + memdev="$1" + addr="$2" + + echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/inject_poison +} + +clear_poison_sysfs() +{ + memdev="$1" + addr="$2" + + echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/clear_poison +} + +validate_poison_found() +{ + list_by="$1" + nr_expect="$2" + + poison_list="$($CXL list "$list_by" --media-errors | + jq -r '.[].media_errors')" + if [[ ! $poison_list ]]; then + nr_found=0 + else + nr_found=$(jq "length" <<< "$poison_list") + fi + if [ "$nr_found" -ne "$nr_expect" ]; then + echo "$nr_expect poison records expected, $nr_found found" + err "$LINENO" + fi +} + +test_poison_by_memdev() +{ + find_memdev + inject_poison_sysfs "$memdev" "0x40000000" + inject_poison_sysfs "$memdev" "0x40001000" + inject_poison_sysfs "$memdev" "0x600" + inject_poison_sysfs "$memdev" "0x0" + validate_poison_found "-m $memdev" 4 + + clear_poison_sysfs "$memdev" "0x40000000" + clear_poison_sysfs "$memdev" "0x40001000" + clear_poison_sysfs "$memdev" "0x600" + clear_poison_sysfs "$memdev" "0x0" + validate_poison_found "-m $memdev" 0 +} + +test_poison_by_region() +{ + create_x2_region + inject_poison_sysfs "$mem0" "0x40000000" + inject_poison_sysfs "$mem1" "0x40000000" + validate_poison_found "-r $region" 2 + + clear_poison_sysfs "$mem0" "0x40000000" + clear_poison_sysfs "$mem1" "0x40000000" + validate_poison_found "-r $region" 0 +} + +# Turn tracing on. Note that 'cxl list --poison' does toggle the tracing. +# Turning it on here allows the test user to also view inject and clear +# trace events. +echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable + +test_poison_by_memdev +test_poison_by_region + +check_dmesg "$LINENO" + +modprobe -r cxl-test diff --git a/test/meson.build b/test/meson.build index a965a79fd6cb..d871e28e17ce 100644 --- a/test/meson.build +++ b/test/meson.build @@ -160,6 +160,7 @@ cxl_events = find_program('cxl-events.sh') cxl_sanitize = find_program('cxl-sanitize.sh') cxl_destroy_region = find_program('cxl-destroy-region.sh') cxl_qos_class = find_program('cxl-qos-class.sh') +cxl_poison = find_program('cxl-poison.sh') tests = [ [ 'libndctl', libndctl, 'ndctl' ], @@ -192,6 +193,7 @@ tests = [ [ 'cxl-sanitize.sh', cxl_sanitize, 'cxl' ], [ 'cxl-destroy-region.sh', cxl_destroy_region, 'cxl' ], [ 'cxl-qos-class.sh', cxl_qos_class, 'cxl' ], + [ 'cxl-poison.sh', cxl_poison, 'cxl' ], ] if get_option('destructive').enabled()